// stb_truetype.h - v1.10 - public domain
// authored from 2009-2015 by Sean Barrett / RAD Game Tools
//
//   This library processes TrueType files:
//        parse files
//        extract glyph metrics
//        extract glyph shapes
//        render glyphs to one-channel bitmaps with antialiasing (box filter)
//
//   Todo:
//        non-MS cmaps
//        crashproof on bad data
//        hinting? (no longer patented)
//        cleartype-style AA?
//        optimize: use simple memory allocator for intermediates
//        optimize: build edge-list directly from curves
//        optimize: rasterize directly from curves?
//
// ADDITIONAL CONTRIBUTORS
//
//   Mikko Mononen: compound shape support, more cmap formats
//   Tor Andersson: kerning, subpixel rendering
//
//   Misc other:
//       Ryan Gordon
//       Simon Glass
//
//   Bug/warning reports/fixes:
//       "Zer" on mollyrocket (with fix)
//       Cass Everitt
//       stoiko (Haemimont Games)
//       Brian Hook 
//       Walter van Niftrik
//       David Gow
//       David Given
//       Ivan-Assen Ivanov
//       Anthony Pesch
//       Johan Duparc
//       Hou Qiming
//       Fabian "ryg" Giesen
//       Martins Mozeiko
//       Cap Petschulat
//       Omar Cornut
//       github:aloucks
//       Peter LaValle
//       Sergey Popov
//       Giumo X. Clanjor
//       Higor Euripedes
//       Thomas Fields
//       Derek Vinyard
//
// VERSION HISTORY
//
//   1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef
//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly
//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
//                     variant PackFontRanges to pack and render in separate phases;
//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
//                     fixed an assert() bug in the new rasterizer
//                     replace assert() with STBTT_assert() in new rasterizer
//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
//                     also more precise AA rasterizer, except if shapes overlap
//                     remove need for STBTT_sort
//   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
//   1.04 (2015-04-15) typo in example
//   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
//
//   Full history can be found at the end of this file.
//
// LICENSE
//
//   This software is dual-licensed to the public domain and under the following
//   license: you are granted a perpetual, irrevocable license to copy, modify,
//   publish, and distribute this file as you see fit.
//
// USAGE
//
//   Include this file in whatever places neeed to refer to it. In ONE C/C++
//   file, write:
//      #define STB_TRUETYPE_IMPLEMENTATION
//   before the #include of this file. This expands out the actual
//   implementation into that C/C++ file.
//
//   To make the implementation private to the file that generates the implementation,
//      #define STBTT_STATIC
//
//   Simple 3D API (don't ship this, but it's fine for tools and quick start)
//           stbtt_BakeFontBitmap()               -- bake a font to a bitmap for use as texture
//           stbtt_GetBakedQuad()                 -- compute quad to draw for a given char
//
//   Improved 3D API (more shippable):
//           #include "stb_rect_pack.h"           -- optional, but you really want it
//           stbtt_PackBegin()
//           stbtt_PackSetOversample()            -- for improved quality on small fonts
//           stbtt_PackFontRanges()               -- pack and renders
//           stbtt_PackEnd()
//           stbtt_GetPackedQuad()
//
//   "Load" a font file from a memory buffer (you have to keep the buffer loaded)
//           stbtt_InitFont()
//           stbtt_GetFontOffsetForIndex()        -- use for TTC font collections
//
//   Render a unicode codepoint to a bitmap
//           stbtt_GetCodepointBitmap()           -- allocates and returns a bitmap
//           stbtt_MakeCodepointBitmap()          -- renders into bitmap you provide
//           stbtt_GetCodepointBitmapBox()        -- how big the bitmap must be
//
//   Character advance/positioning
//           stbtt_GetCodepointHMetrics()
//           stbtt_GetFontVMetrics()
//           stbtt_GetCodepointKernAdvance()
//
//   Starting with version 1.06, the rasterizer was replaced with a new,
//   faster and generally-more-precise rasterizer. The new rasterizer more
//   accurately measures pixel coverage for anti-aliasing, except in the case
//   where multiple shapes overlap, in which case it overestimates the AA pixel
//   coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If
//   this turns out to be a problem, you can re-enable the old rasterizer with
//        #define STBTT_RASTERIZER_VERSION 1
//   which will incur about a 15% speed hit.
//
// ADDITIONAL DOCUMENTATION
//
//   Immediately after this block comment are a series of sample programs.
//
//   After the sample programs is the "header file" section. This section
//   includes documentation for each API function.
//
//   Some important concepts to understand to use this library:
//
//      Codepoint
//         Characters are defined by unicode codepoints, e.g. 65 is
//         uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is
//         the hiragana for "ma".
//
//      Glyph
//         A visual character shape (every codepoint is rendered as
//         some glyph)
//
//      Glyph index
//         A font-specific integer ID representing a glyph
//
//      Baseline
//         Glyph shapes are defined relative to a baseline, which is the
//         bottom of uppercase characters. Characters extend both above
//         and below the baseline.
//
//      Current Point
//         As you draw text to the screen, you keep track of a "current point"
//         which is the origin of each character. The current point's vertical
//         position is the baseline. Even "baked fonts" use this model.
//
//      Vertical Font Metrics
//         The vertical qualities of the font, used to vertically position
//         and space the characters. See docs for stbtt_GetFontVMetrics.
//
//      Font Size in Pixels or Points
//         The preferred interface for specifying font sizes in stb_truetype
//         is to specify how tall the font's vertical extent should be in pixels.
//         If that sounds good enough, skip the next paragraph.
//
//         Most font APIs instead use "points", which are a common typographic
//         measurement for describing font size, defined as 72 points per inch.
//         stb_truetype provides a point API for compatibility. However, true
//         "per inch" conventions don't make much sense on computer displays
//         since they different monitors have different number of pixels per
//         inch. For example, Windows traditionally uses a convention that
//         there are 96 pixels per inch, thus making 'inch' measurements have
//         nothing to do with inches, and thus effectively defining a point to
//         be 1.333 pixels. Additionally, the TrueType font data provides
//         an explicit scale factor to scale a given font's glyphs to points,
//         but the author has observed that this scale factor is often wrong
//         for non-commercial fonts, thus making fonts scaled in points
//         according to the TrueType spec incoherently sized in practice.
//
// ADVANCED USAGE
//
//   Quality:
//
//    - Use the functions with Subpixel at the end to allow your characters
//      to have subpixel positioning. Since the font is anti-aliased, not
//      hinted, this is very import for quality. (This is not possible with
//      baked fonts.)
//
//    - Kerning is now supported, and if you're supporting subpixel rendering
//      then kerning is worth using to give your text a polished look.
//
//   Performance:
//
//    - Convert Unicode codepoints to glyph indexes and operate on the glyphs;
//      if you don't do this, stb_truetype is forced to do the conversion on
//      every call.
//
//    - There are a lot of memory allocations. We should modify it to take
//      a temp buffer and allocate from the temp buffer (without freeing),
//      should help performance a lot.
//
// NOTES
//
//   The system uses the raw data found in the .ttf file without changing it
//   and without building auxiliary data structures. This is a bit inefficient
//   on little-endian systems (the data is big-endian), but assuming you're
//   caching the bitmaps or glyph shapes this shouldn't be a big deal.
//
//   It appears to be very hard to programmatically determine what font a
//   given file is in a general way. I provide an API for this, but I don't
//   recommend it.
//
//
// SOURCE STATISTICS (based on v0.6c, 2050 LOC)
//
//   Documentation & header file        520 LOC  \___ 660 LOC documentation
//   Sample code                        140 LOC  /
//   Truetype parsing                   620 LOC  ---- 620 LOC TrueType
//   Software rasterization             240 LOC  \                           .
//   Curve tesselation                  120 LOC   \__ 550 LOC Bitmap creation
//   Bitmap management                  100 LOC   /
//   Baked bitmap interface              70 LOC  /
//   Font name matching & access        150 LOC  ---- 150 
//   C runtime library abstraction       60 LOC  ----  60
//
//
// PERFORMANCE MEASUREMENTS FOR 1.06:
//
//                      32-bit     64-bit
//   Previous release:  8.83 s     7.68 s
//   Pool allocations:  7.72 s     6.34 s
//   Inline sort     :  6.54 s     5.65 s
//   New rasterizer  :  5.63 s     5.00 s

//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
////
////  SAMPLE PROGRAMS
////
//
//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless
//
#if 0
#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
#include "stb_truetype.h"

unsigned char ttf_buffer[1<<20];
unsigned char temp_bitmap[512*512];

stbtt_bakedchar cdata[96]; // ASCII 32..126 is 95 glyphs
GLuint ftex;

void my_stbtt_initfont(void)
{
   fread(ttf_buffer, 1, 1<<20, fopen("c:/windows/fonts/times.ttf", "rb"));
   stbtt_BakeFontBitmap(ttf_buffer,0, 32.0, temp_bitmap,512,512, 32,96, cdata); // no guarantee this fits!
// can free ttf_buffer at this point
   glGenTextures(1, &ftex);
   glBindTexture(GL_TEXTURE_2D, ftex);
   glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, 512,512, 0, GL_ALPHA, GL_UNSIGNED_BYTE, temp_bitmap);
// can free temp_bitmap at this point
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
}

void my_stbtt_print(float x, float y, char *text)
{
// assume orthographic projection with units = screen pixels, origin at top left
   glEnable(GL_TEXTURE_2D);
   glBindTexture(GL_TEXTURE_2D, ftex);
   glBegin(GL_QUADS);
   while (*text) {
      if (*text >= 32 && *text < 128) {
         stbtt_aligned_quad q;
         stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0);
         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0);
         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1);
         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1);
      }
      ++text;
   }
   glEnd();
}
#endif
//
//
//////////////////////////////////////////////////////////////////////////////
//
// Complete program (this compiles): get a single bitmap, print as ASCII art
//
#if 0
#include <stdio.h>
#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
#include "stb_truetype.h"

char ttf_buffer[1<<25];

int main(int argc, char **argv)
{
   stbtt_fontinfo font;
   unsigned char *bitmap;
   int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20);

   fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb"));

   stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0));
   bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0);

   for (j=0; j < h; ++j) {
      for (i=0; i < w; ++i)
         putchar(" .:ioVM@"[bitmap[j*w+i]>>5]);
      putchar('\n');
   }
   return 0;
}
#endif
//
// Output:
//
//     .ii.
//    @@@@@@.
//   V@Mio@@o
//   :i.  V@V
//     :oM@@M
//   :@@@MM@M
//   @@o  o@M
//  :@@.  M@M
//   @@@o@@@@
//   :M@@V:@@.
//  
//////////////////////////////////////////////////////////////////////////////
// 
// Complete program: print "Hello World!" banner, with bugs
//
#if 0
char buffer[24<<20];
unsigned char screen[20][79];

int main(int arg, char **argv)
{
   stbtt_fontinfo font;
   int i,j,ascent,baseline,ch=0;
   float scale, xpos=2; // leave a little padding in case the character extends left
   char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness

   fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb"));
   stbtt_InitFont(&font, buffer, 0);

   scale = stbtt_ScaleForPixelHeight(&font, 15);
   stbtt_GetFontVMetrics(&font, &ascent,0,0);
   baseline = (int) (ascent*scale);

   while (text[ch]) {
      int advance,lsb,x0,y0,x1,y1;
      float x_shift = xpos - (float) floor(xpos);
      stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb);
      stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1);
      stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]);
// note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong
// because this API is really for baking character bitmaps into textures. if you want to render
// a sequence of characters, you really need to render each bitmap to a temp buffer, then
// "alpha blend" that into the working buffer
      xpos += (advance * scale);
      if (text[ch+1])
         xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]);
      ++ch;
   }

   for (j=0; j < 20; ++j) {
      for (i=0; i < 78; ++i)
         putchar(" .:ioVM@"[screen[j][i]>>5]);
      putchar('\n');
   }

   return 0;
}
#endif

//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
////
////   INTEGRATION WITH YOUR CODEBASE
////
////   The following sections allow you to supply alternate definitions
////   of C library functions used by stb_truetype.

#ifdef STB_TRUETYPE_IMPLEMENTATION
// #define your own (u)stbtt_int8/16/32 before including to override this
#ifndef stbtt_uint8
typedef unsigned char stbtt_uint8;
typedef signed char stbtt_int8;
typedef unsigned short stbtt_uint16;
typedef signed short stbtt_int16;
typedef unsigned int stbtt_uint32;
typedef signed int stbtt_int32;
#endif

typedef char stbtt__check_size32[sizeof(stbtt_int32) == 4 ? 1 : -1];
typedef char stbtt__check_size16[sizeof(stbtt_int16) == 2 ? 1 : -1];

// #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h
#ifndef STBTT_ifloor
#include <math.h>
#define STBTT_ifloor(x)   ((int) floor(x))
#define STBTT_iceil(x)    ((int) ceil(x))
#endif

#ifndef STBTT_sqrt
#include <math.h>
#define STBTT_sqrt(x)      sqrt(x)
#endif

#ifndef STBTT_fabs
#include <math.h>
#define STBTT_fabs(x)      fabs(x)
#endif

// #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h
#ifndef STBTT_malloc
   #include <stdlib.h>
   #define STBTT_malloc(x,u)  ((void)(u),malloc(x))
   #define STBTT_free(x,u)    ((void)(u),free(x))
#endif

#ifndef STBTT_assert
   #include <assert.h>
   #define STBTT_assert(x)    assert(x)
#endif

#ifndef STBTT_strlen
#include <string.h>
#define STBTT_strlen(x)    strlen(x)
#endif

#ifndef STBTT_memcpy
#include <memory.h>
#define STBTT_memcpy       memcpy
#define STBTT_memset       memset
#endif
#endif

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
////
////   INTERFACE
////
////

#ifndef __STB_INCLUDE_STB_TRUETYPE_H__
#define __STB_INCLUDE_STB_TRUETYPE_H__

#ifdef STBTT_STATIC
#define STBTT_DEF static
#else
#define STBTT_DEF extern
#endif

#ifdef __cplusplus
extern "C"
{
#endif

	//////////////////////////////////////////////////////////////////////////////
	//
	// TEXTURE BAKING API
	//
	// If you use this API, you only have to call two functions ever.
	//

	typedef struct
	{
		unsigned short x0, y0, x1, y1; // coordinates of bbox in bitmap
		float xoff, yoff, xadvance;
	} stbtt_bakedchar;

	STBTT_DEF int stbtt_BakeFontBitmap( const unsigned char* data, int offset, // font location (use offset=0 for plain .ttf)
	                                    float pixel_height, // height of font in pixels
	                                    unsigned char* pixels, int pw, int ph, // bitmap to be filled in
	                                    int first_char, int num_chars, // characters to bake
	                                    stbtt_bakedchar* chardata ); // you allocate this, it's num_chars long
	// if return is positive, the first unused row of the bitmap
	// if return is negative, returns the negative of the number of characters that fit
	// if return is 0, no characters fit and no rows were used
	// This uses a very crappy packing.

	typedef struct
	{
		float x0, y0, s0, t0; // top-left
		float x1, y1, s1, t1; // bottom-right
	} stbtt_aligned_quad;

	STBTT_DEF void stbtt_GetBakedQuad( stbtt_bakedchar* chardata, int pw, int ph, // same data as above
	                                   int char_index, // character to display
	                                   float* xpos, float* ypos, // pointers to current position in screen pixel space
	                                   stbtt_aligned_quad* q, // output: quad to draw
	                                   int opengl_fillrule ); // true if opengl fill rule; false if DX9 or earlier
	// Call GetBakedQuad with char_index = 'character - first_char', and it
	// creates the quad you need to draw and advances the current position.
	//
	// The coordinate system used assumes y increases downwards.
	//
	// Characters will extend both above and below the current position;
	// see discussion of "BASELINE" above.
	//
	// It's inefficient; you might want to c&p it and optimize it.

	//////////////////////////////////////////////////////////////////////////////
	//
	// NEW TEXTURE BAKING API
	//
	// This provides options for packing multiple fonts into one atlas, not
	// perfectly but better than nothing.

	typedef struct
	{
		unsigned short x0, y0, x1, y1; // coordinates of bbox in bitmap
		float xoff, yoff, xadvance;
		float xoff2, yoff2;
	} stbtt_packedchar;

	typedef struct stbtt_pack_context stbtt_pack_context;
	typedef struct stbtt_fontinfo stbtt_fontinfo;
#ifndef STB_RECT_PACK_VERSION
typedef struct stbrp_rect stbrp_rect;
#endif

	STBTT_DEF int stbtt_PackBegin( stbtt_pack_context* spc, unsigned char* pixels, int width, int height, int stride_in_bytes, int padding, void* alloc_context );
	// Initializes a packing context stored in the passed-in stbtt_pack_context.
	// Future calls using this context will pack characters into the bitmap passed
	// in here: a 1-channel bitmap that is weight x height. stride_in_bytes is
	// the distance from one row to the next (or 0 to mean they are packed tightly
	// together). "padding" is the amount of padding to leave between each
	// character (normally you want '1' for bitmaps you'll use as textures with
	// bilinear filtering).
	//
	// Returns 0 on failure, 1 on success.

	STBTT_DEF void stbtt_PackEnd( stbtt_pack_context* spc );
	// Cleans up the packing context and frees all memory.

#define STBTT_POINT_SIZE(x)   (-(x))

	STBTT_DEF int stbtt_PackFontRange( stbtt_pack_context* spc, unsigned char* fontdata, int font_index, float font_size,
	                                   int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar* chardata_for_range );

	// Creates character bitmaps from the font_index'th font found in fontdata (use
	// font_index=0 if you don't know what that is). It creates num_chars_in_range
	// bitmaps for characters with unicode values starting at first_unicode_char_in_range
	// and increasing. Data for how to render them is stored in chardata_for_range;
	// pass these to stbtt_GetPackedQuad to get back renderable quads.
	//
	// font_size is the full height of the character from ascender to descender,
	// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed
	// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE()
	// and pass that result as 'font_size':
	//       ...,                  20 , ... // font max minus min y is 20 pixels tall
	//       ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall

	typedef struct
	{
		float font_size;
		int first_unicode_codepoint_in_range; // if non-zero, then the chars are continuous, and this is the first codepoint
		int* array_of_unicode_codepoints; // if non-zero, then this is an array of unicode codepoints
		int num_chars;
		stbtt_packedchar* chardata_for_range; // output
		unsigned char h_oversample, v_oversample; // don't set these, they're used internally
	} stbtt_pack_range;

	STBTT_DEF int stbtt_PackFontRanges( stbtt_pack_context* spc, unsigned char* fontdata, int font_index, stbtt_pack_range* ranges, int num_ranges );
	// Creates character bitmaps from multiple ranges of characters stored in
	// ranges. This will usually create a better-packed bitmap than multiple
	// calls to stbtt_PackFontRange. Note that you can call this multiple
	// times within a single PackBegin/PackEnd.

	STBTT_DEF void stbtt_PackSetOversampling( stbtt_pack_context* spc, unsigned int h_oversample, unsigned int v_oversample );
	// Oversampling a font increases the quality by allowing higher-quality subpixel
	// positioning, and is especially valuable at smaller text sizes.
	//
	// This function sets the amount of oversampling for all following calls to
	// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given
	// pack context. The default (no oversampling) is achieved by h_oversample=1
	// and v_oversample=1. The total number of pixels required is
	// h_oversample*v_oversample larger than the default; for example, 2x2
	// oversampling requires 4x the storage of 1x1. For best results, render
	// oversampled textures with bilinear filtering. Look at the readme in
	// stb/tests/oversample for information about oversampled fonts
	//
	// To use with PackFontRangesGather etc., you must set it before calls
	// call to PackFontRangesGatherRects.

	STBTT_DEF void stbtt_GetPackedQuad( stbtt_packedchar* chardata, int pw, int ph, // same data as above
	                                    int char_index, // character to display
	                                    float* xpos, float* ypos, // pointers to current position in screen pixel space
	                                    stbtt_aligned_quad* q, // output: quad to draw
	                                    int align_to_integer );

	STBTT_DEF int stbtt_PackFontRangesGatherRects( stbtt_pack_context* spc, stbtt_fontinfo* info, stbtt_pack_range* ranges, int num_ranges, stbrp_rect* rects );
	STBTT_DEF void stbtt_PackFontRangesPackRects( stbtt_pack_context* spc, stbrp_rect* rects, int num_rects );
	STBTT_DEF int stbtt_PackFontRangesRenderIntoRects( stbtt_pack_context* spc, stbtt_fontinfo* info, stbtt_pack_range* ranges, int num_ranges, stbrp_rect* rects );

	// Calling these functions in sequence is roughly equivalent to calling
	// stbtt_PackFontRanges(). If you more control over the packing of multiple
	// fonts, or if you want to pack custom data into a font texture, take a look
	// at the source to of stbtt_PackFontRanges() and create a custom version 
	// using these functions, e.g. call GatherRects multiple times,
	// building up a single array of rects, then call PackRects once,
	// then call RenderIntoRects repeatedly. This may result in a
	// better packing than calling PackFontRanges multiple times
	// (or it may not).

	// this is an opaque structure that you shouldn't mess with which holds
	// all the context needed from PackBegin to PackEnd.
	struct stbtt_pack_context
	{
		void* user_allocator_context;
		void* pack_info;
		int width;
		int height;
		int stride_in_bytes;
		int padding;
		unsigned int h_oversample, v_oversample;
		unsigned char* pixels;
		void* nodes;
	};

	//////////////////////////////////////////////////////////////////////////////
	//
	// FONT LOADING
	//
	//

	STBTT_DEF int stbtt_GetFontOffsetForIndex( const unsigned char* data, int index );

	// Each .ttf/.ttc file may have more than one font. Each font has a sequential
	// index number starting from 0. Call this function to get the font offset for
	// a given index; it returns -1 if the index is out of range. A regular .ttf
	// file will only define one font and it always be at offset 0, so it will
	// return '0' for index 0, and -1 for all other indices. You can just skip
	// this step if you know it's that kind of font.

	// The following structure is defined publically so you can declare one on
	// the stack or as a global or etc, but you should treat it as opaque.
	struct stbtt_fontinfo
	{
		void* userdata;
		unsigned char* data; // pointer to .ttf file
		int fontstart; // offset of start of font

		int numGlyphs; // number of glyphs, needed for range checking

		int loca, head, glyf, hhea, hmtx, kern; // table locations as offset from start of .ttf
		int index_map; // a cmap mapping for our chosen character encoding
		int indexToLocFormat; // format needed to map from glyph index to glyph
	};

	STBTT_DEF int stbtt_InitFont( stbtt_fontinfo* info, const unsigned char* data, int offset );
	// Given an offset into the file that defines a font, this function builds
	// the necessary cached info for the rest of the system. You must allocate
	// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't
	// need to do anything special to free it, because the contents are pure
	// value data with no additional data structures. Returns 0 on failure.

	//////////////////////////////////////////////////////////////////////////////
	//
	// CHARACTER TO GLYPH-INDEX CONVERSIOn

	STBTT_DEF int stbtt_FindGlyphIndex( const stbtt_fontinfo* info, int unicode_codepoint );
	// If you're going to perform multiple operations on the same character
	// and you want a speed-up, call this function with the character you're
	// going to process, then use glyph-based functions instead of the
	// codepoint-based functions.

	//////////////////////////////////////////////////////////////////////////////
	//
	// CHARACTER PROPERTIES
	//

	STBTT_DEF float stbtt_ScaleForPixelHeight( const stbtt_fontinfo* info, float pixels );
	// computes a scale factor to produce a font whose "height" is 'pixels' tall.
	// Height is measured as the distance from the highest ascender to the lowest
	// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics
	// and computing:
	//       scale = pixels / (ascent - descent)
	// so if you prefer to measure height by the ascent only, use a similar calculation.

	STBTT_DEF float stbtt_ScaleForMappingEmToPixels( const stbtt_fontinfo* info, float pixels );
	// computes a scale factor to produce a font whose EM size is mapped to
	// 'pixels' tall. This is probably what traditional APIs compute, but
	// I'm not positive.

	STBTT_DEF void stbtt_GetFontVMetrics( const stbtt_fontinfo* info, int* ascent, int* descent, int* lineGap );
	// ascent is the coordinate above the baseline the font extends; descent
	// is the coordinate below the baseline the font extends (i.e. it is typically negative)
	// lineGap is the spacing between one row's descent and the next row's ascent...
	// so you should advance the vertical position by "*ascent - *descent + *lineGap"
	//   these are expressed in unscaled coordinates, so you must multiply by
	//   the scale factor for a given size

	STBTT_DEF void stbtt_GetFontBoundingBox( const stbtt_fontinfo* info, int* x0, int* y0, int* x1, int* y1 );
	// the bounding box around all possible characters

	STBTT_DEF void stbtt_GetCodepointHMetrics( const stbtt_fontinfo* info, int codepoint, int* advanceWidth, int* leftSideBearing );
	// leftSideBearing is the offset from the current horizontal position to the left edge of the character
	// advanceWidth is the offset from the current horizontal position to the next horizontal position
	//   these are expressed in unscaled coordinates

	STBTT_DEF int stbtt_GetCodepointKernAdvance( const stbtt_fontinfo* info, int ch1, int ch2 );
	// an additional amount to add to the 'advance' value between ch1 and ch2

	STBTT_DEF int stbtt_GetCodepointBox( const stbtt_fontinfo* info, int codepoint, int* x0, int* y0, int* x1, int* y1 );
	// Gets the bounding box of the visible part of the glyph, in unscaled coordinates

	STBTT_DEF void stbtt_GetGlyphHMetrics( const stbtt_fontinfo* info, int glyph_index, int* advanceWidth, int* leftSideBearing );
	STBTT_DEF int stbtt_GetGlyphKernAdvance( const stbtt_fontinfo* info, int glyph1, int glyph2 );
	STBTT_DEF int stbtt_GetGlyphBox( const stbtt_fontinfo* info, int glyph_index, int* x0, int* y0, int* x1, int* y1 );
	// as above, but takes one or more glyph indices for greater efficiency

	//////////////////////////////////////////////////////////////////////////////
	//
	// GLYPH SHAPES (you probably don't need these, but they have to go before
	// the bitmaps for C declaration-order reasons)
	//

#ifndef STBTT_vmove // you can predefine these to use different values (but why?)
	enum
	{
		STBTT_vmove=1,
		STBTT_vline,
		STBTT_vcurve
	};
#endif

#ifndef stbtt_vertex // you can predefine this to use different values
	// (we share this with other code at RAD)
#define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file

	typedef struct
	{
		stbtt_vertex_type x, y, cx, cy;
		unsigned char type, padding;
	} stbtt_vertex;
#endif

	STBTT_DEF int stbtt_IsGlyphEmpty( const stbtt_fontinfo* info, int glyph_index );
	// returns non-zero if nothing is drawn for this glyph

	STBTT_DEF int stbtt_GetCodepointShape( const stbtt_fontinfo* info, int unicode_codepoint, stbtt_vertex** vertices );
	STBTT_DEF int stbtt_GetGlyphShape( const stbtt_fontinfo* info, int glyph_index, stbtt_vertex** vertices );
	// returns # of vertices and fills *vertices with the pointer to them
	//   these are expressed in "unscaled" coordinates
	//
	// The shape is a series of countours. Each one starts with
	// a STBTT_moveto, then consists of a series of mixed
	// STBTT_lineto and STBTT_curveto segments. A lineto
	// draws a line from previous endpoint to its x,y; a curveto
	// draws a quadratic bezier from previous endpoint to
	// its x,y, using cx,cy as the bezier control point.

	STBTT_DEF void stbtt_FreeShape( const stbtt_fontinfo* info, stbtt_vertex* vertices );
	// frees the data allocated above

	//////////////////////////////////////////////////////////////////////////////
	//
	// BITMAP RENDERING
	//

	STBTT_DEF void stbtt_FreeBitmap( unsigned char* bitmap, void* userdata );
	// frees the bitmap allocated below

	STBTT_DEF unsigned char* stbtt_GetCodepointBitmap( const stbtt_fontinfo* info, float scale_x, float scale_y, int codepoint, int* width, int* height, int* xoff, int* yoff );
	// allocates a large-enough single-channel 8bpp bitmap and renders the
	// specified character/glyph at the specified scale into it, with
	// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque).
	// *width & *height are filled out with the width & height of the bitmap,
	// which is stored left-to-right, top-to-bottom.
	//
	// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap

	STBTT_DEF unsigned char* stbtt_GetCodepointBitmapSubpixel( const stbtt_fontinfo* info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int* width, int* height, int* xoff, int* yoff );
	// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel
	// shift for the character

	STBTT_DEF void stbtt_MakeCodepointBitmap( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint );
	// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap
	// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap
	// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the
	// width and height and positioning info for it first.

	STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint );
	// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel
	// shift for the character

	STBTT_DEF void stbtt_GetCodepointBitmapBox( const stbtt_fontinfo* font, int codepoint, float scale_x, float scale_y, int* ix0, int* iy0, int* ix1, int* iy1 );
	// get the bbox of the bitmap centered around the glyph origin; so the
	// bitmap width is ix1-ix0, height is iy1-iy0, and location to place
	// the bitmap top left is (leftSideBearing*scale,iy0).
	// (Note that the bitmap uses y-increases-down, but the shape uses
	// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.)

	STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel( const stbtt_fontinfo* font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int* ix0, int* iy0, int* ix1, int* iy1 );
	// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel
	// shift for the character

	// the following functions are equivalent to the above functions, but operate
	// on glyph indices instead of Unicode codepoints (for efficiency)
	STBTT_DEF unsigned char* stbtt_GetGlyphBitmap( const stbtt_fontinfo* info, float scale_x, float scale_y, int glyph, int* width, int* height, int* xoff, int* yoff );
	STBTT_DEF unsigned char* stbtt_GetGlyphBitmapSubpixel( const stbtt_fontinfo* info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int* width, int* height, int* xoff, int* yoff );
	STBTT_DEF void stbtt_MakeGlyphBitmap( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph );
	STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph );
	STBTT_DEF void stbtt_GetGlyphBitmapBox( const stbtt_fontinfo* font, int glyph, float scale_x, float scale_y, int* ix0, int* iy0, int* ix1, int* iy1 );
	STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel( const stbtt_fontinfo* font, int glyph, float scale_x, float scale_y, float shift_x, float shift_y, int* ix0, int* iy0, int* ix1, int* iy1 );

	// @TODO: don't expose this structure
	typedef struct
	{
		int w, h, stride;
		unsigned char* pixels;
	} stbtt__bitmap;

	// rasterize a shape with quadratic beziers into a bitmap
	STBTT_DEF void stbtt_Rasterize( stbtt__bitmap* result, // 1-channel bitmap to draw into
	                                float flatness_in_pixels, // allowable error of curve in pixels
	                                stbtt_vertex* vertices, // array of vertices defining shape
	                                int num_verts, // number of vertices in above array
	                                float scale_x, float scale_y, // scale applied to input vertices
	                                float shift_x, float shift_y, // translation applied to input vertices
	                                int x_off, int y_off, // another translation applied to input
	                                int invert, // if non-zero, vertically flip shape
	                                void* userdata ); // context for to STBTT_MALLOC

	//////////////////////////////////////////////////////////////////////////////
	//
	// Finding the right font...
	//
	// You should really just solve this offline, keep your own tables
	// of what font is what, and don't try to get it out of the .ttf file.
	// That's because getting it out of the .ttf file is really hard, because
	// the names in the file can appear in many possible encodings, in many
	// possible languages, and e.g. if you need a case-insensitive comparison,
	// the details of that depend on the encoding & language in a complex way
	// (actually underspecified in truetype, but also gigantic).
	//
	// But you can use the provided functions in two possible ways:
	//     stbtt_FindMatchingFont() will use *case-sensitive* comparisons on
	//             unicode-encoded names to try to find the font you want;
	//             you can run this before calling stbtt_InitFont()
	//
	//     stbtt_GetFontNameString() lets you get any of the various strings
	//             from the file yourself and do your own comparisons on them.
	//             You have to have called stbtt_InitFont() first.

	STBTT_DEF int stbtt_FindMatchingFont( const unsigned char* fontdata, const char* name, int flags );
	// returns the offset (not index) of the font that matches, or -1 if none
	//   if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold".
	//   if you use any other flag, use a font name like "Arial"; this checks
	//     the 'macStyle' header field; i don't know if fonts set this consistently
#define STBTT_MACSTYLE_DONTCARE     0
#define STBTT_MACSTYLE_BOLD         1
#define STBTT_MACSTYLE_ITALIC       2
#define STBTT_MACSTYLE_UNDERSCORE   4
#define STBTT_MACSTYLE_NONE         8   // <= not same as 0, this makes us check the bitfield is 0

	STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian( const char* s1, int len1, const char* s2, int len2 );
	// returns 1/0 whether the first string interpreted as utf8 is identical to
	// the second string interpreted as big-endian utf16... useful for strings from next func

	STBTT_DEF const char* stbtt_GetFontNameString( const stbtt_fontinfo* font, int* length, int platformID, int encodingID, int languageID, int nameID );

	// returns the string (which may be big-endian double byte, e.g. for unicode)
	// and puts the length in bytes in *length.
	//
	// some of the values for the IDs are below; for more see the truetype spec:
	//     http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html
	//     http://www.microsoft.com/typography/otspec/name.htm

	enum
	{ // platformID
		STBTT_PLATFORM_ID_UNICODE =0,
		STBTT_PLATFORM_ID_MAC =1,
		STBTT_PLATFORM_ID_ISO =2,
		STBTT_PLATFORM_ID_MICROSOFT =3
	};

	enum
	{ // encodingID for STBTT_PLATFORM_ID_UNICODE
		STBTT_UNICODE_EID_UNICODE_1_0 =0,
		STBTT_UNICODE_EID_UNICODE_1_1 =1,
		STBTT_UNICODE_EID_ISO_10646 =2,
		STBTT_UNICODE_EID_UNICODE_2_0_BMP=3,
		STBTT_UNICODE_EID_UNICODE_2_0_FULL=4
	};

	enum
	{ // encodingID for STBTT_PLATFORM_ID_MICROSOFT
		STBTT_MS_EID_SYMBOL =0,
		STBTT_MS_EID_UNICODE_BMP =1,
		STBTT_MS_EID_SHIFTJIS =2,
		STBTT_MS_EID_UNICODE_FULL =10
	};

	enum
	{ // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes
		STBTT_MAC_EID_ROMAN =0,
		STBTT_MAC_EID_ARABIC =4,
		STBTT_MAC_EID_JAPANESE =1,
		STBTT_MAC_EID_HEBREW =5,
		STBTT_MAC_EID_CHINESE_TRAD =2,
		STBTT_MAC_EID_GREEK =6,
		STBTT_MAC_EID_KOREAN =3,
		STBTT_MAC_EID_RUSSIAN =7
	};

	enum
	{ // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID...
		// problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs
		STBTT_MS_LANG_ENGLISH =0x0409,
		STBTT_MS_LANG_ITALIAN =0x0410,
		STBTT_MS_LANG_CHINESE =0x0804,
		STBTT_MS_LANG_JAPANESE =0x0411,
		STBTT_MS_LANG_DUTCH =0x0413,
		STBTT_MS_LANG_KOREAN =0x0412,
		STBTT_MS_LANG_FRENCH =0x040c,
		STBTT_MS_LANG_RUSSIAN =0x0419,
		STBTT_MS_LANG_GERMAN =0x0407,
		STBTT_MS_LANG_SPANISH =0x0409,
		STBTT_MS_LANG_HEBREW =0x040d,
		STBTT_MS_LANG_SWEDISH =0x041D
	};

	enum
	{ // languageID for STBTT_PLATFORM_ID_MAC
		STBTT_MAC_LANG_ENGLISH =0,
		STBTT_MAC_LANG_JAPANESE =11,
		STBTT_MAC_LANG_ARABIC =12,
		STBTT_MAC_LANG_KOREAN =23,
		STBTT_MAC_LANG_DUTCH =4,
		STBTT_MAC_LANG_RUSSIAN =32,
		STBTT_MAC_LANG_FRENCH =1,
		STBTT_MAC_LANG_SPANISH =6,
		STBTT_MAC_LANG_GERMAN =2,
		STBTT_MAC_LANG_SWEDISH =5,
		STBTT_MAC_LANG_HEBREW =10,
		STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33,
		STBTT_MAC_LANG_ITALIAN =3,
		STBTT_MAC_LANG_CHINESE_TRAD =19
	};

#ifdef __cplusplus
}
#endif

#endif // __STB_INCLUDE_STB_TRUETYPE_H__

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
////
////   IMPLEMENTATION
////
////

#ifdef STB_TRUETYPE_IMPLEMENTATION

#ifndef STBTT_MAX_OVERSAMPLE
#define STBTT_MAX_OVERSAMPLE   8
#endif

#if STBTT_MAX_OVERSAMPLE > 255
#error "STBTT_MAX_OVERSAMPLE cannot be > 255"
#endif

typedef int stbtt__test_oversample_pow2[( STBTT_MAX_OVERSAMPLE & ( STBTT_MAX_OVERSAMPLE - 1 ) ) == 0 ? 1 : -1];

#ifndef STBTT_RASTERIZER_VERSION
#define STBTT_RASTERIZER_VERSION 2
#endif

//////////////////////////////////////////////////////////////////////////
//
// accessors to parse data from file
//

// on platforms that don't allow misaligned reads, if we want to allow
// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE

#define ttBYTE(p)     (* (stbtt_uint8 *) (p))
#define ttCHAR(p)     (* (stbtt_int8 *) (p))
#define ttFixed(p)    ttLONG(p)

#if defined(STB_TRUETYPE_BIGENDIAN) && !defined(ALLOW_UNALIGNED_TRUETYPE)

   #define ttUSHORT(p)   (* (stbtt_uint16 *) (p))
   #define ttSHORT(p)    (* (stbtt_int16 *) (p))
   #define ttULONG(p)    (* (stbtt_uint32 *) (p))
   #define ttLONG(p)     (* (stbtt_int32 *) (p))

#else

static stbtt_uint16 ttUSHORT( const stbtt_uint8* p )
{
	return p[ 0 ] * 256 + p[ 1 ];
}

static stbtt_int16 ttSHORT( const stbtt_uint8* p )
{
	return p[ 0 ] * 256 + p[ 1 ];
}

static stbtt_uint32 ttULONG( const stbtt_uint8* p )
{
	return ( p[ 0 ] << 24 ) + ( p[ 1 ] << 16 ) + ( p[ 2 ] << 8 ) + p[ 3 ];
}

static stbtt_int32 ttLONG( const stbtt_uint8* p )
{
	return ( p[ 0 ] << 24 ) + ( p[ 1 ] << 16 ) + ( p[ 2 ] << 8 ) + p[ 3 ];
}

#endif

#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3))
#define stbtt_tag(p,str)           stbtt_tag4(p,str[0],str[1],str[2],str[3])

static int stbtt__isfont( const stbtt_uint8* font )
{
	// check the version number
	if( stbtt_tag4(font, '1',0,0,0) )
		return 1; // TrueType 1
	if( stbtt_tag(font, "typ1") )
		return 1; // TrueType with type 1 font -- we don't support this!
	if( stbtt_tag(font, "OTTO") )
		return 1; // OpenType with CFF
	if( stbtt_tag4(font, 0,1,0,0) )
		return 1; // OpenType 1.0
	return 0;
}

// @OPTIMIZE: binary search
static stbtt_uint32 stbtt__find_table( stbtt_uint8* data, stbtt_uint32 fontstart, const char* tag )
{
	stbtt_int32 num_tables = ttUSHORT( data + fontstart + 4 );
	stbtt_uint32 tabledir = fontstart + 12;
	stbtt_int32 i;
	for( i = 0; i < num_tables; ++i )
	{
		stbtt_uint32 loc = tabledir + 16 * i;
		if( stbtt_tag(data+loc+0, tag) )
			return ttULONG( data + loc + 8 );
	}
	return 0;
}

STBTT_DEF int stbtt_GetFontOffsetForIndex( const unsigned char* font_collection, int index )
{
	// if it's just a font, there's only one valid index
	if( stbtt__isfont( font_collection ) )
		return index == 0 ? 0 : -1;

	// check if it's a TTC
	if( stbtt_tag(font_collection, "ttcf") )
	{
		// version 1?
		if( ttULONG( font_collection + 4 ) == 0x00010000 || ttULONG( font_collection + 4 ) == 0x00020000 )
		{
			stbtt_int32 n = ttLONG( font_collection + 8 );
			if( index >= n )
				return -1;
			return ttULONG( font_collection + 12 + index * 4 );
		}
	}
	return -1;
}

STBTT_DEF int stbtt_InitFont( stbtt_fontinfo* info, const unsigned char* data2, int fontstart )
{
	stbtt_uint8* data = ( stbtt_uint8 * ) data2;
	stbtt_uint32 cmap, t;
	stbtt_int32 i, numTables;

	info->data = data;
	info->fontstart = fontstart;

	cmap = stbtt__find_table( data, fontstart, "cmap" ); // required
	info->loca = stbtt__find_table( data, fontstart, "loca" ); // required
	info->head = stbtt__find_table( data, fontstart, "head" ); // required
	info->glyf = stbtt__find_table( data, fontstart, "glyf" ); // required
	info->hhea = stbtt__find_table( data, fontstart, "hhea" ); // required
	info->hmtx = stbtt__find_table( data, fontstart, "hmtx" ); // required
	info->kern = stbtt__find_table( data, fontstart, "kern" ); // not required
	if( !cmap || !info->loca || !info->head || !info->glyf || !info->hhea || !info->hmtx )
		return 0;

	t = stbtt__find_table( data, fontstart, "maxp" );
	if( t )
		info->numGlyphs = ttUSHORT( data + t + 4 );
	else
		info->numGlyphs = 0xffff;

	// find a cmap encoding table we understand *now* to avoid searching
	// later. (todo: could make this installable)
	// the same regardless of glyph.
	numTables = ttUSHORT( data + cmap + 2 );
	info->index_map = 0;
	for( i = 0; i < numTables; ++i )
	{
		stbtt_uint32 encoding_record = cmap + 4 + 8 * i;
		// find an encoding we understand:
		switch( ttUSHORT( data + encoding_record ) )
		{
			case STBTT_PLATFORM_ID_MICROSOFT:
				switch( ttUSHORT( data + encoding_record + 2 ) )
				{
					case STBTT_MS_EID_UNICODE_BMP:
					case STBTT_MS_EID_UNICODE_FULL:
						// MS/Unicode
						info->index_map = cmap + ttULONG( data + encoding_record + 4 );
						break;
				}
				break;
			case STBTT_PLATFORM_ID_UNICODE:
				// Mac/iOS has these
				// all the encodingIDs are unicode, so we don't bother to check it
				info->index_map = cmap + ttULONG( data + encoding_record + 4 );
				break;
		}
	}
	if( info->index_map == 0 )
		return 0;

	info->indexToLocFormat = ttUSHORT( data + info->head + 50 );
	return 1;
}

STBTT_DEF int stbtt_FindGlyphIndex( const stbtt_fontinfo* info, int unicode_codepoint )
{
	stbtt_uint8* data = info->data;
	stbtt_uint32 index_map = info->index_map;

	stbtt_uint16 format = ttUSHORT( data + index_map + 0 );
	if( format == 0 )
	{ // apple byte encoding
		stbtt_int32 bytes = ttUSHORT( data + index_map + 2 );
		if( unicode_codepoint < bytes - 6 )
			return ttBYTE(data + index_map + 6 + unicode_codepoint);
		return 0;
	}
	else if( format == 6 )
	{
		stbtt_uint32 first = ttUSHORT( data + index_map + 6 );
		stbtt_uint32 count = ttUSHORT( data + index_map + 8 );
		if( ( stbtt_uint32 ) unicode_codepoint >= first && ( stbtt_uint32 ) unicode_codepoint < first + count )
			return ttUSHORT( data + index_map + 10 + ( unicode_codepoint - first ) * 2 );
		return 0;
	}
	else if( format == 2 )
	{
		STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean
		return 0;
	}
	else if( format == 4 )
	{ // standard mapping for windows fonts: binary search collection of ranges
		stbtt_uint16 segcount = ttUSHORT( data + index_map + 6 ) >> 1;
		stbtt_uint16 searchRange = ttUSHORT( data + index_map + 8 ) >> 1;
		stbtt_uint16 entrySelector = ttUSHORT( data + index_map + 10 );
		stbtt_uint16 rangeShift = ttUSHORT( data + index_map + 12 ) >> 1;

		// do a binary search of the segments
		stbtt_uint32 endCount = index_map + 14;
		stbtt_uint32 search = endCount;

		if( unicode_codepoint > 0xffff )
			return 0;

		// they lie from endCount .. endCount + segCount
		// but searchRange is the nearest power of two, so...
		if( unicode_codepoint >= ttUSHORT( data + search + rangeShift * 2 ) )
			search += rangeShift * 2;

		// now decrement to bias correctly to find smallest
		search -= 2;
		while( entrySelector )
		{
			stbtt_uint16 end;
			searchRange >>= 1;
			end = ttUSHORT( data + search + searchRange * 2 );
			if( unicode_codepoint > end )
				search += searchRange * 2;
			--entrySelector;
		}
		search += 2;

		{
			stbtt_uint16 offset, start;
			stbtt_uint16 item = ( stbtt_uint16 ) ( ( search - endCount ) >> 1 );

			STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item));
			start = ttUSHORT( data + index_map + 14 + segcount * 2 + 2 + 2 * item );
			if( unicode_codepoint < start )
				return 0;

			offset = ttUSHORT( data + index_map + 14 + segcount * 6 + 2 + 2 * item );
			if( offset == 0 )
				return ( stbtt_uint16 ) ( unicode_codepoint + ttSHORT( data + index_map + 14 + segcount * 4 + 2 + 2 * item ) );

			return ttUSHORT( data + offset + ( unicode_codepoint - start ) * 2 + index_map + 14 + segcount * 6 + 2 + 2 * item );
		}
	}
	else if( format == 12 || format == 13 )
	{
		stbtt_uint32 ngroups = ttULONG( data + index_map + 12 );
		stbtt_int32 low, high;
		low = 0;
		high = ( stbtt_int32 )ngroups;
		// Binary search the right group.
		while( low < high )
		{
			stbtt_int32 mid = low + ( ( high - low ) >> 1 ); // rounds down, so low <= mid < high
			stbtt_uint32 start_char = ttULONG( data + index_map + 16 + mid * 12 );
			stbtt_uint32 end_char = ttULONG( data + index_map + 16 + mid * 12 + 4 );
			if( ( stbtt_uint32 ) unicode_codepoint < start_char )
				high = mid;
			else if( ( stbtt_uint32 ) unicode_codepoint > end_char )
				low = mid + 1;
			else
			{
				stbtt_uint32 start_glyph = ttULONG( data + index_map + 16 + mid * 12 + 8 );
				if( format == 12 )
					return start_glyph + unicode_codepoint - start_char;
				else // format == 13
					return start_glyph;
			}
		}
		return 0; // not found
	}
	// @TODO
	STBTT_assert(0);
	return 0;
}

STBTT_DEF int stbtt_GetCodepointShape( const stbtt_fontinfo* info, int unicode_codepoint, stbtt_vertex** vertices )
{
	return stbtt_GetGlyphShape( info, stbtt_FindGlyphIndex( info, unicode_codepoint ), vertices );
}

static void stbtt_setvertex( stbtt_vertex* v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy )
{
	v->type = type;
	v->x = ( stbtt_int16 ) x;
	v->y = ( stbtt_int16 ) y;
	v->cx = ( stbtt_int16 ) cx;
	v->cy = ( stbtt_int16 ) cy;
}

static int stbtt__GetGlyfOffset( const stbtt_fontinfo* info, int glyph_index )
{
	int g1, g2;

	if( glyph_index >= info->numGlyphs )
		return -1; // glyph index out of range
	if( info->indexToLocFormat >= 2 )
		return -1; // unknown index->glyph map format

	if( info->indexToLocFormat == 0 )
	{
		g1 = info->glyf + ttUSHORT( info->data + info->loca + glyph_index * 2 ) * 2;
		g2 = info->glyf + ttUSHORT( info->data + info->loca + glyph_index * 2 + 2 ) * 2;
	}
	else
	{
		g1 = info->glyf + ttULONG( info->data + info->loca + glyph_index * 4 );
		g2 = info->glyf + ttULONG( info->data + info->loca + glyph_index * 4 + 4 );
	}

	return g1 == g2 ? -1 : g1; // if length is 0, return -1
}

STBTT_DEF int stbtt_GetGlyphBox( const stbtt_fontinfo* info, int glyph_index, int* x0, int* y0, int* x1, int* y1 )
{
	int g = stbtt__GetGlyfOffset( info, glyph_index );
	if( g < 0 )
		return 0;

	if( x0 )
		*x0 = ttSHORT( info->data + g + 2 );
	if( y0 )
		*y0 = ttSHORT( info->data + g + 4 );
	if( x1 )
		*x1 = ttSHORT( info->data + g + 6 );
	if( y1 )
		*y1 = ttSHORT( info->data + g + 8 );
	return 1;
}

STBTT_DEF int stbtt_GetCodepointBox( const stbtt_fontinfo* info, int codepoint, int* x0, int* y0, int* x1, int* y1 )
{
	return stbtt_GetGlyphBox( info, stbtt_FindGlyphIndex( info, codepoint ), x0, y0, x1, y1 );
}

STBTT_DEF int stbtt_IsGlyphEmpty( const stbtt_fontinfo* info, int glyph_index )
{
	stbtt_int16 numberOfContours;
	int g = stbtt__GetGlyfOffset( info, glyph_index );
	if( g < 0 )
		return 1;
	numberOfContours = ttSHORT( info->data + g );
	return numberOfContours == 0;
}

static int stbtt__close_shape( stbtt_vertex* vertices, int num_vertices, int was_off, int start_off,
                               stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy )
{
	if( start_off )
	{
		if( was_off )
			stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vcurve, ( cx + scx ) >> 1, ( cy + scy ) >> 1, cx, cy );
		stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vcurve, sx, sy, scx, scy );
	}
	else
	{
		if( was_off )
			stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vcurve, sx, sy, cx, cy );
		else
			stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vline, sx, sy, 0, 0 );
	}
	return num_vertices;
}

STBTT_DEF int stbtt_GetGlyphShape( const stbtt_fontinfo* info, int glyph_index, stbtt_vertex** pvertices )
{
	stbtt_int16 numberOfContours;
	stbtt_uint8* endPtsOfContours;
	stbtt_uint8* data = info->data;
	stbtt_vertex* vertices = 0;
	int num_vertices = 0;
	int g = stbtt__GetGlyfOffset( info, glyph_index );

	*pvertices = NULL;

	if( g < 0 )
		return 0;

	numberOfContours = ttSHORT( data + g );

	if( numberOfContours > 0 )
	{
		stbtt_uint8 flags = 0, flagcount;
		stbtt_int32 ins, i, j = 0, m, n, next_move, was_off = 0, off, start_off = 0;
		stbtt_int32 x, y, cx, cy, sx, sy, scx, scy;
		stbtt_uint8* points;
		endPtsOfContours = ( data + g + 10 );
		ins = ttUSHORT( data + g + 10 + numberOfContours * 2 );
		points = data + g + 10 + numberOfContours * 2 + 2 + ins;

		n = 1 + ttUSHORT( endPtsOfContours + numberOfContours * 2 - 2 );

		m = n + 2 * numberOfContours; // a loose bound on how many vertices we might need
		vertices = ( stbtt_vertex * ) STBTT_malloc(m * sizeof(vertices[0]), info->userdata);
		if( vertices == 0 )
			return 0;

		next_move = 0;
		flagcount = 0;

		// in first pass, we load uninterpreted data into the allocated array
		// above, shifted to the end of the array so we won't overwrite it when
		// we create our final data starting from the front

		off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated

		// first load flags

		for( i = 0; i < n; ++i )
		{
			if( flagcount == 0 )
			{
				flags = *points++;
				if( flags & 8 )
					flagcount = *points++;
			}
			else
				--flagcount;
			vertices[ off + i ].type = flags;
		}

		// now load x coordinates
		x = 0;
		for( i = 0; i < n; ++i )
		{
			flags = vertices[ off + i ].type;
			if( flags & 2 )
			{
				stbtt_int16 dx = *points++;
				x += ( flags & 16 ) ? dx : -dx; // ???
			}
			else
			{
				if( !( flags & 16 ) )
				{
					x = x + ( stbtt_int16 ) ( points[ 0 ] * 256 + points[ 1 ] );
					points += 2;
				}
			}
			vertices[ off + i ].x = ( stbtt_int16 ) x;
		}

		// now load y coordinates
		y = 0;
		for( i = 0; i < n; ++i )
		{
			flags = vertices[ off + i ].type;
			if( flags & 4 )
			{
				stbtt_int16 dy = *points++;
				y += ( flags & 32 ) ? dy : -dy; // ???
			}
			else
			{
				if( !( flags & 32 ) )
				{
					y = y + ( stbtt_int16 ) ( points[ 0 ] * 256 + points[ 1 ] );
					points += 2;
				}
			}
			vertices[ off + i ].y = ( stbtt_int16 ) y;
		}

		// now convert them to our format
		num_vertices = 0;
		sx = sy = cx = cy = scx = scy = 0;
		for( i = 0; i < n; ++i )
		{
			flags = vertices[ off + i ].type;
			x = ( stbtt_int16 ) vertices[ off + i ].x;
			y = ( stbtt_int16 ) vertices[ off + i ].y;

			if( next_move == i )
			{
				if( i != 0 )
					num_vertices = stbtt__close_shape( vertices, num_vertices, was_off, start_off, sx, sy, scx, scy, cx, cy );

				// now start the new one               
				start_off = !( flags & 1 );
				if( start_off )
				{
					// if we start off with an off-curve point, then when we need to find a point on the curve
					// where we can start, and we need to save some state for when we wraparound.
					scx = x;
					scy = y;
					if( !( vertices[ off + i + 1 ].type & 1 ) )
					{
						// next point is also a curve point, so interpolate an on-point curve
						sx = ( x + ( stbtt_int32 ) vertices[ off + i + 1 ].x ) >> 1;
						sy = ( y + ( stbtt_int32 ) vertices[ off + i + 1 ].y ) >> 1;
					}
					else
					{
						// otherwise just use the next point as our start point
						sx = ( stbtt_int32 ) vertices[ off + i + 1 ].x;
						sy = ( stbtt_int32 ) vertices[ off + i + 1 ].y;
						++i; // we're using point i+1 as the starting point, so skip it
					}
				}
				else
				{
					sx = x;
					sy = y;
				}
				stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vmove, sx, sy, 0, 0 );
				was_off = 0;
				next_move = 1 + ttUSHORT( endPtsOfContours + j * 2 );
				++j;
			}
			else
			{
				if( !( flags & 1 ) )
				{ // if it's a curve
					if( was_off ) // two off-curve control points in a row means interpolate an on-curve midpoint
						stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vcurve, ( cx + x ) >> 1, ( cy + y ) >> 1, cx, cy );
					cx = x;
					cy = y;
					was_off = 1;
				}
				else
				{
					if( was_off )
						stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vcurve, x, y, cx, cy );
					else
						stbtt_setvertex( &vertices[ num_vertices++ ], STBTT_vline, x, y, 0, 0 );
					was_off = 0;
				}
			}
		}
		num_vertices = stbtt__close_shape( vertices, num_vertices, was_off, start_off, sx, sy, scx, scy, cx, cy );
	}
	else if( numberOfContours == -1 )
	{
		// Compound shapes.
		int more = 1;
		stbtt_uint8* comp = data + g + 10;
		num_vertices = 0;
		vertices = 0;
		while( more )
		{
			stbtt_uint16 flags, gidx;
			int comp_num_verts = 0, i;
			stbtt_vertex *comp_verts = 0, *tmp = 0;
			float mtx[6] = { 1,0,0,1,0,0 }, m, n;

			flags = ttSHORT( comp );
			comp += 2;
			gidx = ttSHORT( comp );
			comp += 2;

			if( flags & 2 )
			{ // XY values
				if( flags & 1 )
				{ // shorts
					mtx[ 4 ] = ttSHORT( comp );
					comp += 2;
					mtx[ 5 ] = ttSHORT( comp );
					comp += 2;
				}
				else
				{
					mtx[ 4 ] = ttCHAR(comp);
					comp += 1;
					mtx[ 5 ] = ttCHAR(comp);
					comp += 1;
				}
			}
			else
			{
				// @TODO handle matching point
				STBTT_assert(0);
			}
			if( flags & ( 1 << 3 ) )
			{ // WE_HAVE_A_SCALE
				mtx[ 0 ] = mtx[ 3 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
				mtx[ 1 ] = mtx[ 2 ] = 0;
			}
			else if( flags & ( 1 << 6 ) )
			{ // WE_HAVE_AN_X_AND_YSCALE
				mtx[ 0 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
				mtx[ 1 ] = mtx[ 2 ] = 0;
				mtx[ 3 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
			}
			else if( flags & ( 1 << 7 ) )
			{ // WE_HAVE_A_TWO_BY_TWO
				mtx[ 0 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
				mtx[ 1 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
				mtx[ 2 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
				mtx[ 3 ] = ttSHORT( comp ) / 16384.0f;
				comp += 2;
			}

			// Find transformation scales.
			m = ( float ) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]);
			n = ( float ) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]);

			// Get indexed glyph.
			comp_num_verts = stbtt_GetGlyphShape( info, gidx, &comp_verts );
			if( comp_num_verts > 0 )
			{
				// Transform vertices.
				for( i = 0; i < comp_num_verts; ++i )
				{
					stbtt_vertex* v = &comp_verts[ i ];
					stbtt_vertex_type x, y;
					x = v->x;
					y = v->y;
					v->x = ( stbtt_vertex_type )( m * ( mtx[ 0 ] * x + mtx[ 2 ] * y + mtx[ 4 ] ) );
					v->y = ( stbtt_vertex_type )( n * ( mtx[ 1 ] * x + mtx[ 3 ] * y + mtx[ 5 ] ) );
					x = v->cx;
					y = v->cy;
					v->cx = ( stbtt_vertex_type )( m * ( mtx[ 0 ] * x + mtx[ 2 ] * y + mtx[ 4 ] ) );
					v->cy = ( stbtt_vertex_type )( n * ( mtx[ 1 ] * x + mtx[ 3 ] * y + mtx[ 5 ] ) );
				}
				// Append vertices.
				tmp = ( stbtt_vertex* )STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata);
				if( !tmp )
				{
					if( vertices ) STBTT_free(vertices, info->userdata);
					if( comp_verts ) STBTT_free(comp_verts, info->userdata);
					return 0;
				}
				if( num_vertices > 0 ) STBTT_memcpy( tmp, vertices, num_vertices * sizeof(stbtt_vertex) );
				STBTT_memcpy( tmp + num_vertices, comp_verts, comp_num_verts * sizeof(stbtt_vertex) );
				if( vertices ) STBTT_free(vertices, info->userdata);
				vertices = tmp;
				STBTT_free(comp_verts, info->userdata);
				num_vertices += comp_num_verts;
			}
			// More components ?
			more = flags & ( 1 << 5 );
		}
	}
	else if( numberOfContours < 0 )
	{
		// @TODO other compound variations?
		STBTT_assert(0);
	}
	else
	{
		// numberOfCounters == 0, do nothing
	}

	*pvertices = vertices;
	return num_vertices;
}

STBTT_DEF void stbtt_GetGlyphHMetrics( const stbtt_fontinfo* info, int glyph_index, int* advanceWidth, int* leftSideBearing )
{
	stbtt_uint16 numOfLongHorMetrics = ttUSHORT( info->data + info->hhea + 34 );
	if( glyph_index < numOfLongHorMetrics )
	{
		if( advanceWidth )
			*advanceWidth = ttSHORT( info->data + info->hmtx + 4 * glyph_index );
		if( leftSideBearing )
			*leftSideBearing = ttSHORT( info->data + info->hmtx + 4 * glyph_index + 2 );
	}
	else
	{
		if( advanceWidth )
			*advanceWidth = ttSHORT( info->data + info->hmtx + 4 * ( numOfLongHorMetrics - 1 ) );
		if( leftSideBearing )
			*leftSideBearing = ttSHORT( info->data + info->hmtx + 4 * numOfLongHorMetrics + 2 * ( glyph_index - numOfLongHorMetrics ) );
	}
}

STBTT_DEF int stbtt_GetGlyphKernAdvance( const stbtt_fontinfo* info, int glyph1, int glyph2 )
{
	stbtt_uint8* data = info->data + info->kern;
	stbtt_uint32 needle, straw;
	int l, r, m;

	// we only look at the first table. it must be 'horizontal' and format 0.
	if( !info->kern )
		return 0;
	if( ttUSHORT( data + 2 ) < 1 ) // number of tables, need at least 1
		return 0;
	if( ttUSHORT( data + 8 ) != 1 ) // horizontal flag must be set in format
		return 0;

	l = 0;
	r = ttUSHORT( data + 10 ) - 1;
	needle = glyph1 << 16 | glyph2;
	while( l <= r )
	{
		m = ( l + r ) >> 1;
		straw = ttULONG( data + 18 + ( m * 6 ) ); // note: unaligned read
		if( needle < straw )
			r = m - 1;
		else if( needle > straw )
			l = m + 1;
		else
			return ttSHORT( data + 22 + ( m * 6 ) );
	}
	return 0;
}

STBTT_DEF int stbtt_GetCodepointKernAdvance( const stbtt_fontinfo* info, int ch1, int ch2 )
{
	if( !info->kern ) // if no kerning table, don't waste time looking up both codepoint->glyphs
		return 0;
	return stbtt_GetGlyphKernAdvance( info, stbtt_FindGlyphIndex( info, ch1 ), stbtt_FindGlyphIndex( info, ch2 ) );
}

STBTT_DEF void stbtt_GetCodepointHMetrics( const stbtt_fontinfo* info, int codepoint, int* advanceWidth, int* leftSideBearing )
{
	stbtt_GetGlyphHMetrics( info, stbtt_FindGlyphIndex( info, codepoint ), advanceWidth, leftSideBearing );
}

STBTT_DEF void stbtt_GetFontVMetrics( const stbtt_fontinfo* info, int* ascent, int* descent, int* lineGap )
{
	if( ascent )
		*ascent = ttSHORT( info->data + info->hhea + 4 );
	if( descent )
		*descent = ttSHORT( info->data + info->hhea + 6 );
	if( lineGap )
		*lineGap = ttSHORT( info->data + info->hhea + 8 );
}

STBTT_DEF void stbtt_GetFontBoundingBox( const stbtt_fontinfo* info, int* x0, int* y0, int* x1, int* y1 )
{
	*x0 = ttSHORT( info->data + info->head + 36 );
	*y0 = ttSHORT( info->data + info->head + 38 );
	*x1 = ttSHORT( info->data + info->head + 40 );
	*y1 = ttSHORT( info->data + info->head + 42 );
}

STBTT_DEF float stbtt_ScaleForPixelHeight( const stbtt_fontinfo* info, float height )
{
	int fheight = ttSHORT( info->data + info->hhea + 4 ) - ttSHORT( info->data + info->hhea + 6 );
	return ( float ) height / fheight;
}

STBTT_DEF float stbtt_ScaleForMappingEmToPixels( const stbtt_fontinfo* info, float pixels )
{
	int unitsPerEm = ttUSHORT( info->data + info->head + 18 );
	return pixels / unitsPerEm;
}

STBTT_DEF void stbtt_FreeShape( const stbtt_fontinfo* info, stbtt_vertex* v )
{
	STBTT_free(v, info->userdata);
}

//////////////////////////////////////////////////////////////////////////////
//
// antialiasing software rasterizer
//

STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel( const stbtt_fontinfo* font, int glyph, float scale_x, float scale_y, float shift_x, float shift_y, int* ix0, int* iy0, int* ix1, int* iy1 )
{
	int x0 = 0, y0 = 0, x1, y1; // =0 suppresses compiler warning
	if( !stbtt_GetGlyphBox( font, glyph, &x0, &y0, &x1, &y1 ) )
	{
		// e.g. space character
		if( ix0 )
			*ix0 = 0;
		if( iy0 )
			*iy0 = 0;
		if( ix1 )
			*ix1 = 0;
		if( iy1 )
			*iy1 = 0;
	}
	else
	{
		// move to integral bboxes (treating pixels as little squares, what pixels get touched)?
		if( ix0 )
			*ix0 = STBTT_ifloor( x0 * scale_x + shift_x);
		if( iy0 )
			*iy0 = STBTT_ifloor(-y1 * scale_y + shift_y);
		if( ix1 )
			*ix1 = STBTT_iceil ( x1 * scale_x + shift_x);
		if( iy1 )
			*iy1 = STBTT_iceil (-y0 * scale_y + shift_y);
	}
}

STBTT_DEF void stbtt_GetGlyphBitmapBox( const stbtt_fontinfo* font, int glyph, float scale_x, float scale_y, int* ix0, int* iy0, int* ix1, int* iy1 )
{
	stbtt_GetGlyphBitmapBoxSubpixel( font, glyph, scale_x, scale_y, 0.0f, 0.0f, ix0, iy0, ix1, iy1 );
}

STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel( const stbtt_fontinfo* font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int* ix0, int* iy0, int* ix1, int* iy1 )
{
	stbtt_GetGlyphBitmapBoxSubpixel( font, stbtt_FindGlyphIndex( font, codepoint ), scale_x, scale_y, shift_x, shift_y, ix0, iy0, ix1, iy1 );
}

STBTT_DEF void stbtt_GetCodepointBitmapBox( const stbtt_fontinfo* font, int codepoint, float scale_x, float scale_y, int* ix0, int* iy0, int* ix1, int* iy1 )
{
	stbtt_GetCodepointBitmapBoxSubpixel( font, codepoint, scale_x, scale_y, 0.0f, 0.0f, ix0, iy0, ix1, iy1 );
}

//////////////////////////////////////////////////////////////////////////////
//
//  Rasterizer

typedef struct stbtt__hheap_chunk
{
	struct stbtt__hheap_chunk* next;
} stbtt__hheap_chunk;

typedef struct stbtt__hheap
{
	struct stbtt__hheap_chunk* head;
	void* first_free;
	int num_remaining_in_head_chunk;
} stbtt__hheap;

static void* stbtt__hheap_alloc( stbtt__hheap* hh, size_t size, void* userdata )
{
	if( hh->first_free )
	{
		void* p = hh->first_free;
		hh->first_free = * ( void ** ) p;
		return p;
	}
	else
	{
		if( hh->num_remaining_in_head_chunk == 0 )
		{
			int count = ( size < 32 ? 2000 : size < 128 ? 800 : 100 );
			stbtt__hheap_chunk* c = ( stbtt__hheap_chunk * ) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata);
			if( c == NULL )
				return NULL;
			c->next = hh->head;
			hh->head = c;
			hh->num_remaining_in_head_chunk = count;
		}
		--hh->num_remaining_in_head_chunk;
		return ( char * ) ( hh->head ) + size * hh->num_remaining_in_head_chunk;
	}
}

static void stbtt__hheap_free( stbtt__hheap* hh, void* p )
{
	*( void ** ) p = hh->first_free;
	hh->first_free = p;
}

static void stbtt__hheap_cleanup( stbtt__hheap* hh, void* userdata )
{
	stbtt__hheap_chunk* c = hh->head;
	while( c )
	{
		stbtt__hheap_chunk* n = c->next;
		STBTT_free(c, userdata);
		c = n;
	}
}

typedef struct stbtt__edge
{
	float x0, y0, x1, y1;
	int invert;
} stbtt__edge;

typedef struct stbtt__active_edge
{
	struct stbtt__active_edge* next;
#if STBTT_RASTERIZER_VERSION==1
   int x,dx;
   float ey;
   int direction;
#elif STBTT_RASTERIZER_VERSION==2
	float fx, fdx, fdy;
	float direction;
	float sy;
	float ey;
#else
   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
#endif
} stbtt__active_edge;

#if STBTT_RASTERIZER_VERSION == 1
#define STBTT_FIXSHIFT   10
#define STBTT_FIX        (1 << STBTT_FIXSHIFT)
#define STBTT_FIXMASK    (STBTT_FIX-1)

static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata)
{
   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
   float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
   STBTT_assert(z != NULL);
   if (!z) return z;
   
// round dx down to avoid overshooting
   if (dxdy < 0)
      z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy);
   else
      z->dx = STBTT_ifloor(STBTT_FIX * dxdy);

   z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount
   z->x -= off_x * STBTT_FIX;

   z->ey = e->y1;
   z->next = 0;
   z->direction = e->invert ? 1 : -1;
   return z;
}
#elif STBTT_RASTERIZER_VERSION == 2
static stbtt__active_edge* stbtt__new_active( stbtt__hheap* hh, stbtt__edge* e, int off_x, float start_point, void* userdata )
{
	stbtt__active_edge* z = ( stbtt__active_edge * ) stbtt__hheap_alloc( hh, sizeof( *z ), userdata );
	float dxdy = ( e->x1 - e->x0 ) / ( e->y1 - e->y0 );
	STBTT_assert(z != NULL);
	//STBTT_assert(e->y0 <= start_point);
	if( !z )
		return z;
	z->fdx = dxdy;
	z->fdy = dxdy != 0.0f ? ( 1.0f / dxdy ) : 0.0f;
	z->fx = e->x0 + dxdy * ( start_point - e->y0 );
	z->fx -= off_x;
	z->direction = e->invert ? 1.0f : -1.0f;
	z->sy = e->y0;
	z->ey = e->y1;
	z->next = 0;
	return z;
}
#else
#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
#endif

#if STBTT_RASTERIZER_VERSION == 1
// note: this routine clips fills that extend off the edges... ideally this
// wouldn't happen, but it could happen if the truetype glyph bounding boxes
// are wrong, or if the user supplies a too-small bitmap
static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight)
{
// non-zero winding fill
   int x0=0, w=0;

   while (e) {
      if (w == 0) {
// if we're currently at zero, we need to record the edge start point
         x0 = e->x; w += e->direction;
      } else {
         int x1 = e->x; w += e->direction;
// if we went to zero, we need to draw
         if (w == 0) {
            int i = x0 >> STBTT_FIXSHIFT;
            int j = x1 >> STBTT_FIXSHIFT;

            if (i < len && j >= 0) {
               if (i == j) {
// x0,x1 are the same pixel, so compute combined coverage
                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT);
               } else {
                  if (i >= 0) // add antialiasing for x0
                     scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT);
                  else
                     i = -1; // clip

                  if (j < len) // add antialiasing for x1
                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT);
                  else
                     j = len; // clip

                  for (++i; i < j; ++i) // fill pixels between x0 and x1
                     scanline[i] = scanline[i] + (stbtt_uint8) max_weight;
               }
            }
         }
      }
      
      e = e->next;
   }
}

static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
{
   stbtt__hheap hh = { 0, 0, 0 };
   stbtt__active_edge *active = NULL;
   int y,j=0;
   int max_weight = (255 / vsubsample);  // weight per vertical scanline
   int s; // vertical subsample index
   unsigned char scanline_data[512], *scanline;

   if (result->w > 512)
      scanline = (unsigned char *) STBTT_malloc(result->w, userdata);
   else
      scanline = scanline_data;

   y = off_y * vsubsample;
   e[n].y0 = (off_y + result->h) * (float) vsubsample + 1;

   while (j < result->h) {
      STBTT_memset(scanline, 0, result->w);
      for (s=0; s < vsubsample; ++s) {
// find center of pixel for this scanline
         float scan_y = y + 0.5f;
         stbtt__active_edge **step = &active;

// update all active edges;
// remove all active edges that terminate before the center of this scanline
         while (*step) {
            stbtt__active_edge * z = *step;
            if (z->ey <= scan_y) {
               *step = z->next; // delete from list
               STBTT_assert(z->direction);
               z->direction = 0;
               stbtt__hheap_free(&hh, z);
            } else {
               z->x += z->dx; // advance to position for current scanline
               step = &((*step)->next); // advance through list
            }
         }

// resort the list if needed
         for(;;) {
            int changed=0;
            step = &active;
            while (*step && (*step)->next) {
               if ((*step)->x > (*step)->next->x) {
                  stbtt__active_edge *t = *step;
                  stbtt__active_edge *q = t->next;

                  t->next = q->next;
                  q->next = t;
                  *step = q;
                  changed = 1;
               }
               step = &(*step)->next;
            }
            if (!changed) break;
         }

// insert all edges that start before the center of this scanline -- omit ones that also end on this scanline
         while (e->y0 <= scan_y) {
            if (e->y1 > scan_y) {
               stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata);
               if (z != NULL) {
// find insertion point
                  if (active == NULL)
                     active = z;
                  else if (z->x < active->x) {
// insert at front
                     z->next = active;
                     active = z;
                  } else {
// find thing to insert AFTER
                     stbtt__active_edge *p = active;
                     while (p->next && p->next->x < z->x)
                        p = p->next;
// at this point, p->next->x is NOT < z->x
                     z->next = p->next;
                     p->next = z;
                  }
               }
            }
            ++e;
         }

// now process all active edges in XOR fashion
         if (active)
            stbtt__fill_active_edges(scanline, result->w, active, max_weight);

         ++y;
      }
      STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w);
      ++j;
   }

   stbtt__hheap_cleanup(&hh, userdata);

   if (scanline != scanline_data)
      STBTT_free(scanline, userdata);
}

#elif STBTT_RASTERIZER_VERSION == 2

// the edge passed in here does not cross the vertical line at x or the vertical line at x+1
// (i.e. it has already been clipped to those)
static void stbtt__handle_clipped_edge( float* scanline, int x, stbtt__active_edge* e, float x0, float y0, float x1, float y1 )
{
	if( y0 == y1 )
		return;
	STBTT_assert(y0 < y1);
	STBTT_assert(e->sy <= e->ey);
	if( y0 > e->ey )
		return;
	if( y1 < e->sy )
		return;
	if( y0 < e->sy )
	{
		x0 += ( x1 - x0 ) * ( e->sy - y0 ) / ( y1 - y0 );
		y0 = e->sy;
	}
	if( y1 > e->ey )
	{
		x1 += ( x1 - x0 ) * ( e->ey - y1 ) / ( y1 - y0 );
		y1 = e->ey;
	}

	if( x0 == x )
	STBTT_assert(x1 <= x+1);
	else if( x0 == x + 1 )
	STBTT_assert(x1 >= x);
	else if( x0 <= x )
	STBTT_assert(x1 <= x);
	else if( x0 >= x + 1 )
	STBTT_assert(x1 >= x+1);
	else
	STBTT_assert(x1 >= x && x1 <= x+1);

	if( x0 <= x && x1 <= x )
		scanline[ x ] += e->direction * ( y1 - y0 );
	else if( x0 >= x + 1 && x1 >= x + 1 );
	else
	{
		STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1);
		scanline[ x ] += e->direction * ( y1 - y0 ) * ( 1 - ( ( x0 - x ) + ( x1 - x ) ) / 2 ); // coverage = 1 - average x position
	}
}

static void stbtt__fill_active_edges_new( float* scanline, float* scanline_fill, int len, stbtt__active_edge* e, float y_top )
{
	float y_bottom = y_top + 1;

	while( e )
	{
		// brute force every pixel

		// compute intersection points with top & bottom
		STBTT_assert(e->ey >= y_top);

		if( e->fdx == 0 )
		{
			float x0 = e->fx;
			if( x0 < len )
			{
				if( x0 >= 0 )
				{
					stbtt__handle_clipped_edge( scanline, ( int ) x0, e, x0, y_top, x0, y_bottom );
					stbtt__handle_clipped_edge( scanline_fill - 1, ( int ) x0 + 1, e, x0, y_top, x0, y_bottom );
				}
				else
				{
					stbtt__handle_clipped_edge( scanline_fill - 1, 0, e, x0, y_top, x0, y_bottom );
				}
			}
		}
		else
		{
			float x0 = e->fx;
			float dx = e->fdx;
			float xb = x0 + dx;
			float x_top, x_bottom;
			float sy0, sy1;
			float dy = e->fdy;
			STBTT_assert(e->sy <= y_bottom && e->ey >= y_top);

			// compute endpoints of line segment clipped to this scanline (if the
			// line segment starts on this scanline. x0 is the intersection of the
			// line with y_top, but that may be off the line segment.
			if( e->sy > y_top )
			{
				x_top = x0 + dx * ( e->sy - y_top );
				sy0 = e->sy;
			}
			else
			{
				x_top = x0;
				sy0 = y_top;
			}
			if( e->ey < y_bottom )
			{
				x_bottom = x0 + dx * ( e->ey - y_top );
				sy1 = e->ey;
			}
			else
			{
				x_bottom = xb;
				sy1 = y_bottom;
			}

			if( x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len )
			{
				// from here on, we don't have to range check x values

				if( ( int ) x_top == ( int ) x_bottom )
				{
					float height;
					// simple case, only spans one pixel
					int x = ( int ) x_top;
					height = sy1 - sy0;
					STBTT_assert(x >= 0 && x < len);
					scanline[ x ] += e->direction * ( 1 - ( ( x_top - x ) + ( x_bottom - x ) ) / 2 ) * height;
					scanline_fill[ x ] += e->direction * height; // everything right of this pixel is filled
				}
				else
				{
					int x, x1, x2;
					float y_crossing, step, sign, area;
					// covers 2+ pixels
					if( x_top > x_bottom )
					{
						// flip scanline vertically; signed area is the same
						float t;
						sy0 = y_bottom - ( sy0 - y_top );
						sy1 = y_bottom - ( sy1 - y_top );
						t = sy0 , sy0 = sy1 , sy1 = t;
						t = x_bottom , x_bottom = x_top , x_top = t;
						dx = -dx;
						dy = -dy;
						t = x0 , x0 = xb , xb = t;
					}

					x1 = ( int ) x_top;
					x2 = ( int ) x_bottom;
					// compute intersection with y axis at x1+1
					y_crossing = ( x1 + 1 - x0 ) * dy + y_top;

					sign = e->direction;
					// area of the rectangle covered from y0..y_crossing
					area = sign * ( y_crossing - sy0 );
					// area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing)
					scanline[ x1 ] += area * ( 1 - ( ( x_top - x1 ) + ( x1 + 1 - x1 ) ) / 2 );

					step = sign * dy;
					for( x = x1 + 1; x < x2; ++x )
					{
						scanline[ x ] += area + step / 2;
						area += step;
					}
					y_crossing += dy * ( x2 - ( x1 + 1 ) );

					STBTT_assert(STBTT_fabs(area) <= 1.01f);

					scanline[ x2 ] += area + sign * ( 1 - ( ( x2 - x2 ) + ( x_bottom - x2 ) ) / 2 ) * ( sy1 - y_crossing );

					scanline_fill[ x2 ] += sign * ( sy1 - sy0 );
				}
			}
			else
			{
				// if edge goes outside of box we're drawing, we require
				// clipping logic. since this does not match the intended use
				// of this library, we use a different, very slow brute
				// force implementation
				int x;
				for( x = 0; x < len; ++x )
				{
					// cases:
					//
					// there can be up to two intersections with the pixel. any intersection
					// with left or right edges can be handled by splitting into two (or three)
					// regions. intersections with top & bottom do not necessitate case-wise logic.
					//
					// the old way of doing this found the intersections with the left & right edges,
					// then used some simple logic to produce up to three segments in sorted order
					// from top-to-bottom. however, this had a problem: if an x edge was epsilon
					// across the x border, then the corresponding y position might not be distinct
					// from the other y segment, and it might ignored as an empty segment. to avoid
					// that, we need to explicitly produce segments based on x positions.

					// rename variables to clear pairs
					float y0 = y_top;
					float x1 = ( float ) ( x );
					float x2 = ( float ) ( x + 1 );
					float x3 = xb;
					float y3 = y_bottom;
					float y1, y2;

					// x = e->x + e->dx * (y-y_top)
					// (y-y_top) = (x - e->x) / e->dx
					// y = (x - e->x) / e->dx + y_top
					y1 = ( x - x0 ) / dx + y_top;
					y2 = ( x + 1 - x0 ) / dx + y_top;

					if( x0 < x1 && x3 > x2 )
					{ // three segments descending down-right
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x1, y1 );
						stbtt__handle_clipped_edge( scanline, x, e, x1, y1, x2, y2 );
						stbtt__handle_clipped_edge( scanline, x, e, x2, y2, x3, y3 );
					}
					else if( x3 < x1 && x0 > x2 )
					{ // three segments descending down-left
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x2, y2 );
						stbtt__handle_clipped_edge( scanline, x, e, x2, y2, x1, y1 );
						stbtt__handle_clipped_edge( scanline, x, e, x1, y1, x3, y3 );
					}
					else if( x0 < x1 && x3 > x1 )
					{ // two segments across x, down-right
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x1, y1 );
						stbtt__handle_clipped_edge( scanline, x, e, x1, y1, x3, y3 );
					}
					else if( x3 < x1 && x0 > x1 )
					{ // two segments across x, down-left
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x1, y1 );
						stbtt__handle_clipped_edge( scanline, x, e, x1, y1, x3, y3 );
					}
					else if( x0 < x2 && x3 > x2 )
					{ // two segments across x+1, down-right
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x2, y2 );
						stbtt__handle_clipped_edge( scanline, x, e, x2, y2, x3, y3 );
					}
					else if( x3 < x2 && x0 > x2 )
					{ // two segments across x+1, down-left
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x2, y2 );
						stbtt__handle_clipped_edge( scanline, x, e, x2, y2, x3, y3 );
					}
					else
					{ // one segment
						stbtt__handle_clipped_edge( scanline, x, e, x0, y0, x3, y3 );
					}
				}
			}
		}
		e = e->next;
	}
}

// directly AA rasterize edges w/o supersampling
static void stbtt__rasterize_sorted_edges( stbtt__bitmap* result, stbtt__edge* e, int n, int vsubsample, int off_x, int off_y, void* userdata )
{
	( void )vsubsample;
	stbtt__hheap hh = { 0, 0, 0 };
	stbtt__active_edge* active = NULL;
	int y, j = 0, i;
	float scanline_data[129], *scanline, *scanline2;

	if( result->w > 64 )
		scanline = ( float * ) STBTT_malloc((result->w*2+1) * sizeof(float), userdata);
	else
		scanline = scanline_data;

	scanline2 = scanline + result->w;

	y = off_y;
	e[ n ].y0 = ( float ) ( off_y + result->h ) + 1;

	while( j < result->h )
	{
		// find center of pixel for this scanline
		float scan_y_top = y + 0.0f;
		float scan_y_bottom = y + 1.0f;
		stbtt__active_edge** step = &active;

		STBTT_memset( scanline, 0, result->w * sizeof( scanline[ 0 ] ) );
		STBTT_memset( scanline2, 0, ( result->w + 1 ) * sizeof( scanline[ 0 ] ) );

		// update all active edges;
		// remove all active edges that terminate before the top of this scanline
		while( *step )
		{
			stbtt__active_edge* z = *step;
			if( z->ey <= scan_y_top )
			{
				*step = z->next; // delete from list
				STBTT_assert(z->direction);
				z->direction = 0;
				stbtt__hheap_free( &hh, z );
			}
			else
			{
				step = &( ( *step )->next ); // advance through list
			}
		}

		// insert all edges that start before the bottom of this scanline
		while( e->y0 <= scan_y_bottom )
		{
			if( e->y0 != e->y1 )
			{
				stbtt__active_edge* z = stbtt__new_active( &hh, e, off_x, scan_y_top, userdata );
				if( z != NULL )
				{
					STBTT_assert(z->ey >= scan_y_top);
					// insert at front
					z->next = active;
					active = z;
				}
			}
			++e;
		}

		// now process all active edges
		if( active )
			stbtt__fill_active_edges_new( scanline, scanline2 + 1, result->w, active, scan_y_top );

		{
			float sum = 0;
			for( i = 0; i < result->w; ++i )
			{
				float k;
				int m;
				sum += scanline2[ i ];
				k = scanline[ i ] + sum;
				k = ( float ) STBTT_fabs(k) * 255 + 0.5f;
				m = ( int ) k;
				if( m > 255 )
					m = 255;
				result->pixels[ j * result->stride + i ] = ( unsigned char ) m;
			}
		}
		// advance all the edges
		step = &active;
		while( *step )
		{
			stbtt__active_edge* z = *step;
			z->fx += z->fdx; // advance to position for current scanline
			step = &( ( *step )->next ); // advance through list
		}

		++y;
		++j;
	}

	stbtt__hheap_cleanup( &hh, userdata );

	if( scanline != scanline_data )
	STBTT_free(scanline, userdata);
}
#else
#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
#endif

#define STBTT__COMPARE(a,b)  ((a)->y0 < (b)->y0)

static void stbtt__sort_edges_ins_sort( stbtt__edge* p, int n )
{
	int i, j;
	for( i = 1; i < n; ++i )
	{
		stbtt__edge t = p[ i ], *a = &t;
		j = i;
		while( j > 0 )
		{
			stbtt__edge* b = &p[ j - 1 ];
			int c = STBTT__COMPARE(a,b);
			if( !c )
				break;
			p[ j ] = p[ j - 1 ];
			--j;
		}
		if( i != j )
			p[ j ] = t;
	}
}

static void stbtt__sort_edges_quicksort( stbtt__edge* p, int n )
{
	/* threshhold for transitioning to insertion sort */
	while( n > 12 )
	{
		stbtt__edge t;
		int c01, c12, c, m, i, j;

		/* compute median of three */
		m = n >> 1;
		c01 = STBTT__COMPARE(&p[0],&p[m]);
		c12 = STBTT__COMPARE(&p[m],&p[n-1]);
		/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
		if( c01 != c12 )
		{
			/* otherwise, we'll need to swap something else to middle */
			int z;
			c = STBTT__COMPARE(&p[0],&p[n-1]);
			/* 0>mid && mid<n:  0>n => n; 0<n => 0 */
			/* 0<mid && mid>n:  0>n => 0; 0<n => n */
			z = ( c == c12 ) ? 0 : n - 1;
			t = p[ z ];
			p[ z ] = p[ m ];
			p[ m ] = t;
		}
		/* now p[m] is the median-of-three */
		/* swap it to the beginning so it won't move around */
		t = p[ 0 ];
		p[ 0 ] = p[ m ];
		p[ m ] = t;

		/* partition loop */
		i = 1;
		j = n - 1;
		for( ;; )
		{
			/* handling of equality is crucial here */
			/* for sentinels & efficiency with duplicates */
			for( ;; ++i )
			{
				if( !STBTT__COMPARE(&p[i], &p[0]) )
					break;
			}
			for( ;; --j )
			{
				if( !STBTT__COMPARE(&p[0], &p[j]) )
					break;
			}
			/* make sure we haven't crossed */
			if( i >= j )
				break;
			t = p[ i ];
			p[ i ] = p[ j ];
			p[ j ] = t;

			++i;
			--j;
		}
		/* recurse on smaller side, iterate on larger */
		if( j < ( n - i ) )
		{
			stbtt__sort_edges_quicksort( p, j );
			p = p + i;
			n = n - i;
		}
		else
		{
			stbtt__sort_edges_quicksort( p + i, n - i );
			n = j;
		}
	}
}

static void stbtt__sort_edges( stbtt__edge* p, int n )
{
	stbtt__sort_edges_quicksort( p, n );
	stbtt__sort_edges_ins_sort( p, n );
}

typedef struct
{
	float x, y;
} stbtt__point;

static void stbtt__rasterize( stbtt__bitmap* result, stbtt__point* pts, int* wcount, int windings, float scale_x, float scale_y, float shift_x, float shift_y, int off_x, int off_y, int invert, void* userdata )
{
	float y_scale_inv = invert ? -scale_y : scale_y;
	stbtt__edge* e;
	int n, i, j, k, m;
#if STBTT_RASTERIZER_VERSION == 1
   int vsubsample = result->h < 8 ? 15 : 5;
#elif STBTT_RASTERIZER_VERSION == 2
	int vsubsample = 1;
#else
   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
#endif
	// vsubsample should divide 255 evenly; otherwise we won't reach full opacity

	// now we have to blow out the windings into explicit edge lists
	n = 0;
	for( i = 0; i < windings; ++i )
		n += wcount[ i ];

	e = ( stbtt__edge * ) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel
	if( e == 0 )
		return;
	n = 0;

	m = 0;
	for( i = 0; i < windings; ++i )
	{
		stbtt__point* p = pts + m;
		m += wcount[ i ];
		j = wcount[ i ] - 1;
		for( k = 0; k < wcount[ i ]; j = k++ )
		{
			int a = k, b = j;
			// skip the edge if horizontal
			if( p[ j ].y == p[ k ].y )
				continue;
			// add edge from j to k to the list
			e[ n ].invert = 0;
			if( invert ? p[ j ].y > p[ k ].y : p[ j ].y < p[ k ].y )
			{
				e[ n ].invert = 1;
				a = j , b = k;
			}
			e[ n ].x0 = p[ a ].x * scale_x + shift_x;
			e[ n ].y0 = ( p[ a ].y * y_scale_inv + shift_y ) * vsubsample;
			e[ n ].x1 = p[ b ].x * scale_x + shift_x;
			e[ n ].y1 = ( p[ b ].y * y_scale_inv + shift_y ) * vsubsample;
			++n;
		}
	}

	// now sort the edges by their highest point (should snap to integer, and then by x)
	//STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
	stbtt__sort_edges( e, n );

	// now, traverse the scanlines and find the intersections on each scanline, use xor winding rule
	stbtt__rasterize_sorted_edges( result, e, n, vsubsample, off_x, off_y, userdata );

	STBTT_free(e, userdata);
}

static void stbtt__add_point( stbtt__point* points, int n, float x, float y )
{
	if( !points )
		return; // during first pass, it's unallocated
	points[ n ].x = x;
	points[ n ].y = y;
}

// tesselate until threshhold p is happy... @TODO warped to compensate for non-linear stretching
static int stbtt__tesselate_curve( stbtt__point* points, int* num_points, float x0, float y0, float x1, float y1, float x2, float y2, float objspace_flatness_squared, int n )
{
	// midpoint
	float mx = ( x0 + 2 * x1 + x2 ) / 4;
	float my = ( y0 + 2 * y1 + y2 ) / 4;
	// versus directly drawn line
	float dx = ( x0 + x2 ) / 2 - mx;
	float dy = ( y0 + y2 ) / 2 - my;
	if( n > 16 ) // 65536 segments on one curve better be enough!
		return 1;
	if( dx * dx + dy * dy > objspace_flatness_squared )
	{ // half-pixel error allowed... need to be smaller if AA
		stbtt__tesselate_curve( points, num_points, x0, y0, ( x0 + x1 ) / 2.0f, ( y0 + y1 ) / 2.0f, mx, my, objspace_flatness_squared, n + 1 );
		stbtt__tesselate_curve( points, num_points, mx, my, ( x1 + x2 ) / 2.0f, ( y1 + y2 ) / 2.0f, x2, y2, objspace_flatness_squared, n + 1 );
	}
	else
	{
		stbtt__add_point( points, *num_points, x2, y2 );
		*num_points = *num_points + 1;
	}
	return 1;
}

// returns number of contours
static stbtt__point* stbtt_FlattenCurves( stbtt_vertex* vertices, int num_verts, float objspace_flatness, int** contour_lengths, int* num_contours, void* userdata )
{
	stbtt__point* points = 0;
	int num_points = 0;

	float objspace_flatness_squared = objspace_flatness * objspace_flatness;
	int i, n = 0, start = 0, pass;

	// count how many "moves" there are to get the contour count
	for( i = 0; i < num_verts; ++i )
		if( vertices[ i ].type == STBTT_vmove )
			++n;

	*num_contours = n;
	if( n == 0 )
		return 0;

	*contour_lengths = ( int * ) STBTT_malloc(sizeof(**contour_lengths) * n, userdata);

	if( *contour_lengths == 0 )
	{
		*num_contours = 0;
		return 0;
	}

	// make two passes through the points so we don't need to realloc
	for( pass = 0; pass < 2; ++pass )
	{
		float x = 0, y = 0;
		if( pass == 1 )
		{
			points = ( stbtt__point * ) STBTT_malloc(num_points * sizeof(points[0]), userdata);
			if( points == NULL )
				goto error;
		}
		num_points = 0;
		n = -1;
		for( i = 0; i < num_verts; ++i )
		{
			switch( vertices[ i ].type )
			{
				case STBTT_vmove:
					// start the next contour
					if( n >= 0 )
						( *contour_lengths )[ n ] = num_points - start;
					++n;
					start = num_points;

					x = vertices[ i ].x , y = vertices[ i ].y;
					stbtt__add_point( points, num_points++, x, y );
					break;
				case STBTT_vline:
					x = vertices[ i ].x , y = vertices[ i ].y;
					stbtt__add_point( points, num_points++, x, y );
					break;
				case STBTT_vcurve:
					stbtt__tesselate_curve( points, &num_points, x, y,
					                        vertices[ i ].cx, vertices[ i ].cy,
					                        vertices[ i ].x, vertices[ i ].y,
					                        objspace_flatness_squared, 0 );
					x = vertices[ i ].x , y = vertices[ i ].y;
					break;
			}
		}
		( *contour_lengths )[ n ] = num_points - start;
	}

	return points;
error:
	STBTT_free(points, userdata);
	STBTT_free(*contour_lengths, userdata);
	*contour_lengths = 0;
	*num_contours = 0;
	return NULL;
}

STBTT_DEF void stbtt_Rasterize( stbtt__bitmap* result, float flatness_in_pixels, stbtt_vertex* vertices, int num_verts, float scale_x, float scale_y, float shift_x, float shift_y, int x_off, int y_off, int invert, void* userdata )
{
	float scale = scale_x > scale_y ? scale_y : scale_x;
	int winding_count, *winding_lengths;
	stbtt__point* windings = stbtt_FlattenCurves( vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata );
	if( windings )
	{
		stbtt__rasterize( result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata );
		STBTT_free(winding_lengths, userdata);
		STBTT_free(windings, userdata);
	}
}

STBTT_DEF void stbtt_FreeBitmap( unsigned char* bitmap, void* userdata )
{
	STBTT_free(bitmap, userdata);
}

STBTT_DEF unsigned char* stbtt_GetGlyphBitmapSubpixel( const stbtt_fontinfo* info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int* width, int* height, int* xoff, int* yoff )
{
	int ix0, iy0, ix1, iy1;
	stbtt__bitmap gbm;
	stbtt_vertex* vertices;
	int num_verts = stbtt_GetGlyphShape( info, glyph, &vertices );

	if( scale_x == 0 )
		scale_x = scale_y;
	if( scale_y == 0 )
	{
		if( scale_x == 0 )
		{
			STBTT_free(vertices, info->userdata);
			return NULL;
		}
		scale_y = scale_x;
	}

	stbtt_GetGlyphBitmapBoxSubpixel( info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0, &iy0, &ix1, &iy1 );

	// now we get the size
	gbm.w = ( ix1 - ix0 );
	gbm.h = ( iy1 - iy0 );
	gbm.pixels = NULL; // in case we error

	if( width )
		*width = gbm.w;
	if( height )
		*height = gbm.h;
	if( xoff )
		*xoff = ix0;
	if( yoff )
		*yoff = iy0;

	if( gbm.w && gbm.h )
	{
		gbm.pixels = ( unsigned char * ) STBTT_malloc(gbm.w * gbm.h, info->userdata);
		if( gbm.pixels )
		{
			gbm.stride = gbm.w;

			stbtt_Rasterize( &gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata );
		}
	}
	STBTT_free(vertices, info->userdata);
	return gbm.pixels;
}

STBTT_DEF unsigned char* stbtt_GetGlyphBitmap( const stbtt_fontinfo* info, float scale_x, float scale_y, int glyph, int* width, int* height, int* xoff, int* yoff )
{
	return stbtt_GetGlyphBitmapSubpixel( info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff );
}

STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph )
{
	int ix0, iy0;
	stbtt_vertex* vertices;
	int num_verts = stbtt_GetGlyphShape( info, glyph, &vertices );
	stbtt__bitmap gbm;

	stbtt_GetGlyphBitmapBoxSubpixel( info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0, &iy0, 0, 0 );
	gbm.pixels = output;
	gbm.w = out_w;
	gbm.h = out_h;
	gbm.stride = out_stride;

	if( gbm.w && gbm.h )
		stbtt_Rasterize( &gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata );

	STBTT_free(vertices, info->userdata);
}

STBTT_DEF void stbtt_MakeGlyphBitmap( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph )
{
	stbtt_MakeGlyphBitmapSubpixel( info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f, 0.0f, glyph );
}

STBTT_DEF unsigned char* stbtt_GetCodepointBitmapSubpixel( const stbtt_fontinfo* info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int* width, int* height, int* xoff, int* yoff )
{
	return stbtt_GetGlyphBitmapSubpixel( info, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex( info, codepoint ), width, height, xoff, yoff );
}

STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint )
{
	stbtt_MakeGlyphBitmapSubpixel( info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex( info, codepoint ) );
}

STBTT_DEF unsigned char* stbtt_GetCodepointBitmap( const stbtt_fontinfo* info, float scale_x, float scale_y, int codepoint, int* width, int* height, int* xoff, int* yoff )
{
	return stbtt_GetCodepointBitmapSubpixel( info, scale_x, scale_y, 0.0f, 0.0f, codepoint, width, height, xoff, yoff );
}

STBTT_DEF void stbtt_MakeCodepointBitmap( const stbtt_fontinfo* info, unsigned char* output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint )
{
	stbtt_MakeCodepointBitmapSubpixel( info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f, 0.0f, codepoint );
}

//////////////////////////////////////////////////////////////////////////////
//
// bitmap baking
//
// This is SUPER-CRAPPY packing to keep source code small

STBTT_DEF int stbtt_BakeFontBitmap( const unsigned char* data, int offset, // font location (use offset=0 for plain .ttf)
                                    float pixel_height, // height of font in pixels
                                    unsigned char* pixels, int pw, int ph, // bitmap to be filled in
                                    int first_char, int num_chars, // characters to bake
                                    stbtt_bakedchar* chardata )
{
	float scale;
	int x, y, bottom_y, i;
	stbtt_fontinfo f;
	f.userdata = NULL;
	if( !stbtt_InitFont( &f, data, offset ) )
		return -1;
	STBTT_memset( pixels, 0, pw * ph ); // background of 0 around pixels
	x = y = 1;
	bottom_y = 1;

	scale = stbtt_ScaleForPixelHeight( &f, pixel_height );

	for( i = 0; i < num_chars; ++i )
	{
		int advance, lsb, x0, y0, x1, y1, gw, gh;
		int g = stbtt_FindGlyphIndex( &f, first_char + i );
		stbtt_GetGlyphHMetrics( &f, g, &advance, &lsb );
		stbtt_GetGlyphBitmapBox( &f, g, scale, scale, &x0, &y0, &x1, &y1 );
		gw = x1 - x0;
		gh = y1 - y0;
		if( x + gw + 1 >= pw )
			y = bottom_y , x = 1; // advance to next row
		if( y + gh + 1 >= ph ) // check if it fits vertically AFTER potentially moving to next row
			return -i;
		STBTT_assert(x+gw < pw);
		STBTT_assert(y+gh < ph);
		stbtt_MakeGlyphBitmap( &f, pixels + x + y * pw, gw, gh, pw, scale, scale, g );
		chardata[ i ].x0 = ( stbtt_int16 ) x;
		chardata[ i ].y0 = ( stbtt_int16 ) y;
		chardata[ i ].x1 = ( stbtt_int16 ) ( x + gw );
		chardata[ i ].y1 = ( stbtt_int16 ) ( y + gh );
		chardata[ i ].xadvance = scale * advance;
		chardata[ i ].xoff = ( float ) x0;
		chardata[ i ].yoff = ( float ) y0;
		x = x + gw + 1;
		if( y + gh + 1 > bottom_y )
			bottom_y = y + gh + 1;
	}
	return bottom_y;
}

STBTT_DEF void stbtt_GetBakedQuad( stbtt_bakedchar* chardata, int pw, int ph, int char_index, float* xpos, float* ypos, stbtt_aligned_quad* q, int opengl_fillrule )
{
	float d3d_bias = opengl_fillrule ? 0 : -0.5f;
	float ipw = 1.0f / pw, iph = 1.0f / ph;
	stbtt_bakedchar* b = chardata + char_index;
	int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f);
	int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f);

	q->x0 = round_x + d3d_bias;
	q->y0 = round_y + d3d_bias;
	q->x1 = round_x + b->x1 - b->x0 + d3d_bias;
	q->y1 = round_y + b->y1 - b->y0 + d3d_bias;

	q->s0 = b->x0 * ipw;
	q->t0 = b->y0 * iph;
	q->s1 = b->x1 * ipw;
	q->t1 = b->y1 * iph;

	*xpos += b->xadvance;
}

//////////////////////////////////////////////////////////////////////////////
//
// rectangle packing replacement routines if you don't have stb_rect_pack.h
//

#ifndef STB_RECT_PACK_VERSION
#ifdef _MSC_VER
#define STBTT__NOTUSED(v)  (void)(v)
#else
#define STBTT__NOTUSED(v)  (void)sizeof(v)
#endif

typedef int stbrp_coord;

////////////////////////////////////////////////////////////////////////////////////
//                                                                                //
//                                                                                //
// COMPILER WARNING ?!?!?                                                         //
//                                                                                //
//                                                                                //
// if you get a compile warning due to these symbols being defined more than      //
// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h"         //
//                                                                                //
////////////////////////////////////////////////////////////////////////////////////

typedef struct
{
   int width,height;
   int x,y,bottom_y;
} stbrp_context;

typedef struct
{
   unsigned char x;
} stbrp_node;

struct stbrp_rect
{
   stbrp_coord x,y;
   int id,w,h,was_packed;
};

static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes)
{
   con->width  = pw;
   con->height = ph;
   con->x = 0;
   con->y = 0;
   con->bottom_y = 0;
   STBTT__NOTUSED(nodes);
   STBTT__NOTUSED(num_nodes);   
}

static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects)
{
   int i;
   for (i=0; i < num_rects; ++i) {
      if (con->x + rects[i].w > con->width) {
         con->x = 0;
         con->y = con->bottom_y;
      }
      if (con->y + rects[i].h > con->height)
         break;
      rects[i].x = con->x;
      rects[i].y = con->y;
      rects[i].was_packed = 1;
      con->x += rects[i].w;
      if (con->y + rects[i].h > con->bottom_y)
         con->bottom_y = con->y + rects[i].h;
   }
   for (   ; i < num_rects; ++i)
      rects[i].was_packed = 0;
}
#endif

//////////////////////////////////////////////////////////////////////////////
//
// bitmap baking
//
// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If
// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy.

STBTT_DEF int stbtt_PackBegin( stbtt_pack_context* spc, unsigned char* pixels, int pw, int ph, int stride_in_bytes, int padding, void* alloc_context )
{
	stbrp_context* context = ( stbrp_context * ) STBTT_malloc(sizeof(*context) ,alloc_context);
	int num_nodes = pw - padding;
	stbrp_node* nodes = ( stbrp_node * ) STBTT_malloc(sizeof(*nodes ) * num_nodes,alloc_context);

	if( context == NULL || nodes == NULL )
	{
		if( context != NULL ) STBTT_free(context, alloc_context);
		if( nodes != NULL ) STBTT_free(nodes , alloc_context);
		return 0;
	}

	spc->user_allocator_context = alloc_context;
	spc->width = pw;
	spc->height = ph;
	spc->pixels = pixels;
	spc->pack_info = context;
	spc->nodes = nodes;
	spc->padding = padding;
	spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw;
	spc->h_oversample = 1;
	spc->v_oversample = 1;

	stbrp_init_target( context, pw - padding, ph - padding, nodes, num_nodes );

	if( pixels )
	STBTT_memset( pixels, 0, pw * ph ); // background of 0 around pixels

	return 1;
}

STBTT_DEF void stbtt_PackEnd( stbtt_pack_context* spc )
{
	STBTT_free(spc->nodes , spc->user_allocator_context);
	STBTT_free(spc->pack_info, spc->user_allocator_context);
}

STBTT_DEF void stbtt_PackSetOversampling( stbtt_pack_context* spc, unsigned int h_oversample, unsigned int v_oversample )
{
	STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE);
	STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE);
	if( h_oversample <= STBTT_MAX_OVERSAMPLE )
		spc->h_oversample = h_oversample;
	if( v_oversample <= STBTT_MAX_OVERSAMPLE )
		spc->v_oversample = v_oversample;
}

#define STBTT__OVER_MASK  (STBTT_MAX_OVERSAMPLE-1)

static void stbtt__h_prefilter( unsigned char* pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width )
{
	unsigned char buffer[STBTT_MAX_OVERSAMPLE];
	int safe_w = w - kernel_width;
	int j;
	STBTT_memset( buffer, 0, STBTT_MAX_OVERSAMPLE ); // suppress bogus warning from VS2013 -analyze
	for( j = 0; j < h; ++j )
	{
		int i;
		unsigned int total;
		STBTT_memset( buffer, 0, kernel_width );

		total = 0;

		// make kernel_width a constant in common cases so compiler can optimize out the divide
		switch( kernel_width )
		{
			case 2:
				for( i = 0; i <= safe_w; ++i )
				{
					total += pixels[ i ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i ];
					pixels[ i ] = ( unsigned char ) ( total / 2 );
				}
				break;
			case 3:
				for( i = 0; i <= safe_w; ++i )
				{
					total += pixels[ i ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i ];
					pixels[ i ] = ( unsigned char ) ( total / 3 );
				}
				break;
			case 4:
				for( i = 0; i <= safe_w; ++i )
				{
					total += pixels[ i ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i ];
					pixels[ i ] = ( unsigned char ) ( total / 4 );
				}
				break;
			case 5:
				for( i = 0; i <= safe_w; ++i )
				{
					total += pixels[ i ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i ];
					pixels[ i ] = ( unsigned char ) ( total / 5 );
				}
				break;
			default:
				for( i = 0; i <= safe_w; ++i )
				{
					total += pixels[ i ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i ];
					pixels[ i ] = ( unsigned char ) ( total / kernel_width );
				}
				break;
		}

		for( ; i < w; ++i )
		{
			STBTT_assert(pixels[i] == 0);
			total -= buffer[ i & STBTT__OVER_MASK ];
			pixels[ i ] = ( unsigned char ) ( total / kernel_width );
		}

		pixels += stride_in_bytes;
	}
}

static void stbtt__v_prefilter( unsigned char* pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width )
{
	unsigned char buffer[STBTT_MAX_OVERSAMPLE];
	int safe_h = h - kernel_width;
	int j;
	STBTT_memset( buffer, 0, STBTT_MAX_OVERSAMPLE ); // suppress bogus warning from VS2013 -analyze
	for( j = 0; j < w; ++j )
	{
		int i;
		unsigned int total;
		STBTT_memset( buffer, 0, kernel_width );

		total = 0;

		// make kernel_width a constant in common cases so compiler can optimize out the divide
		switch( kernel_width )
		{
			case 2:
				for( i = 0; i <= safe_h; ++i )
				{
					total += pixels[ i * stride_in_bytes ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i * stride_in_bytes ];
					pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / 2 );
				}
				break;
			case 3:
				for( i = 0; i <= safe_h; ++i )
				{
					total += pixels[ i * stride_in_bytes ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i * stride_in_bytes ];
					pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / 3 );
				}
				break;
			case 4:
				for( i = 0; i <= safe_h; ++i )
				{
					total += pixels[ i * stride_in_bytes ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i * stride_in_bytes ];
					pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / 4 );
				}
				break;
			case 5:
				for( i = 0; i <= safe_h; ++i )
				{
					total += pixels[ i * stride_in_bytes ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i * stride_in_bytes ];
					pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / 5 );
				}
				break;
			default:
				for( i = 0; i <= safe_h; ++i )
				{
					total += pixels[ i * stride_in_bytes ] - buffer[ i & STBTT__OVER_MASK ];
					buffer[ ( i + kernel_width ) & STBTT__OVER_MASK ] = pixels[ i * stride_in_bytes ];
					pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / kernel_width );
				}
				break;
		}

		for( ; i < h; ++i )
		{
			STBTT_assert(pixels[i*stride_in_bytes] == 0);
			total -= buffer[ i & STBTT__OVER_MASK ];
			pixels[ i * stride_in_bytes ] = ( unsigned char ) ( total / kernel_width );
		}

		pixels += 1;
	}
}

static float stbtt__oversample_shift( int oversample )
{
	if( !oversample )
		return 0.0f;

	// The prefilter is a box filter of width "oversample",
	// which shifts phase by (oversample - 1)/2 pixels in
	// oversampled space. We want to shift in the opposite
	// direction to counter this.
	return ( float )-( oversample - 1 ) / ( 2.0f * ( float )oversample );
}

// rects array must be big enough to accommodate all characters in the given ranges
STBTT_DEF int stbtt_PackFontRangesGatherRects( stbtt_pack_context* spc, stbtt_fontinfo* info, stbtt_pack_range* ranges, int num_ranges, stbrp_rect* rects )
{
	int i, j, k;

	k = 0;
	for( i = 0; i < num_ranges; ++i )
	{
		float fh = ranges[ i ].font_size;
		float scale = fh > 0 ? stbtt_ScaleForPixelHeight( info, fh ) : stbtt_ScaleForMappingEmToPixels( info, -fh );
		ranges[ i ].h_oversample = ( unsigned char ) spc->h_oversample;
		ranges[ i ].v_oversample = ( unsigned char ) spc->v_oversample;
		for( j = 0; j < ranges[ i ].num_chars; ++j )
		{
			int x0, y0, x1, y1;
			int codepoint = ranges[ i ].array_of_unicode_codepoints == NULL ? ranges[ i ].first_unicode_codepoint_in_range + j : ranges[ i ].array_of_unicode_codepoints[ j ];
			int glyph = stbtt_FindGlyphIndex( info, codepoint );
			stbtt_GetGlyphBitmapBoxSubpixel( info, glyph,
			                                 scale * spc->h_oversample,
			                                 scale * spc->v_oversample,
			                                 0, 0,
			                                 &x0, &y0, &x1, &y1 );
			rects[ k ].w = ( stbrp_coord ) ( x1 - x0 + spc->padding + spc->h_oversample - 1 );
			rects[ k ].h = ( stbrp_coord ) ( y1 - y0 + spc->padding + spc->v_oversample - 1 );
			++k;
		}
	}

	return k;
}

// rects array must be big enough to accommodate all characters in the given ranges
STBTT_DEF int stbtt_PackFontRangesRenderIntoRects( stbtt_pack_context* spc, stbtt_fontinfo* info, stbtt_pack_range* ranges, int num_ranges, stbrp_rect* rects )
{
	int i, j, k, return_value = 1;

	// save current values
	int old_h_over = spc->h_oversample;
	int old_v_over = spc->v_oversample;

	k = 0;
	for( i = 0; i < num_ranges; ++i )
	{
		float fh = ranges[ i ].font_size;
		float scale = fh > 0 ? stbtt_ScaleForPixelHeight( info, fh ) : stbtt_ScaleForMappingEmToPixels( info, -fh );
		float recip_h, recip_v, sub_x, sub_y;
		spc->h_oversample = ranges[ i ].h_oversample;
		spc->v_oversample = ranges[ i ].v_oversample;
		recip_h = 1.0f / spc->h_oversample;
		recip_v = 1.0f / spc->v_oversample;
		sub_x = stbtt__oversample_shift( spc->h_oversample );
		sub_y = stbtt__oversample_shift( spc->v_oversample );
		for( j = 0; j < ranges[ i ].num_chars; ++j )
		{
			stbrp_rect* r = &rects[ k ];
			if( r->was_packed )
			{
				stbtt_packedchar* bc = &ranges[ i ].chardata_for_range[ j ];
				int advance, lsb, x0, y0, x1, y1;
				int codepoint = ranges[ i ].array_of_unicode_codepoints == NULL ? ranges[ i ].first_unicode_codepoint_in_range + j : ranges[ i ].array_of_unicode_codepoints[ j ];
				int glyph = stbtt_FindGlyphIndex( info, codepoint );
				stbrp_coord pad = ( stbrp_coord ) spc->padding;

				// pad on left and top
				r->x += pad;
				r->y += pad;
				r->w -= pad;
				r->h -= pad;
				stbtt_GetGlyphHMetrics( info, glyph, &advance, &lsb );
				stbtt_GetGlyphBitmapBox( info, glyph,
				                         scale * spc->h_oversample,
				                         scale * spc->v_oversample,
				                         &x0, &y0, &x1, &y1 );
				stbtt_MakeGlyphBitmapSubpixel( info,
				                               spc->pixels + r->x + r->y * spc->stride_in_bytes,
				                               r->w - spc->h_oversample + 1,
				                               r->h - spc->v_oversample + 1,
				                               spc->stride_in_bytes,
				                               scale * spc->h_oversample,
				                               scale * spc->v_oversample,
				                               0, 0,
				                               glyph );

				if( spc->h_oversample > 1 )
					stbtt__h_prefilter( spc->pixels + r->x + r->y * spc->stride_in_bytes,
					                    r->w, r->h, spc->stride_in_bytes,
					                    spc->h_oversample );

				if( spc->v_oversample > 1 )
					stbtt__v_prefilter( spc->pixels + r->x + r->y * spc->stride_in_bytes,
					                    r->w, r->h, spc->stride_in_bytes,
					                    spc->v_oversample );

				bc->x0 = ( stbtt_int16 ) r->x;
				bc->y0 = ( stbtt_int16 ) r->y;
				bc->x1 = ( stbtt_int16 ) ( r->x + r->w );
				bc->y1 = ( stbtt_int16 ) ( r->y + r->h );
				bc->xadvance = scale * advance;
				bc->xoff = ( float ) x0 * recip_h + sub_x;
				bc->yoff = ( float ) y0 * recip_v + sub_y;
				bc->xoff2 = ( x0 + r->w ) * recip_h + sub_x;
				bc->yoff2 = ( y0 + r->h ) * recip_v + sub_y;
			}
			else
			{
				return_value = 0; // if any fail, report failure
			}

			++k;
		}
	}

	// restore original values
	spc->h_oversample = old_h_over;
	spc->v_oversample = old_v_over;

	return return_value;
}

STBTT_DEF void stbtt_PackFontRangesPackRects( stbtt_pack_context* spc, stbrp_rect* rects, int num_rects )
{
	stbrp_pack_rects( ( stbrp_context * ) spc->pack_info, rects, num_rects );
}

STBTT_DEF int stbtt_PackFontRanges( stbtt_pack_context* spc, unsigned char* fontdata, int font_index, stbtt_pack_range* ranges, int num_ranges )
{
	stbtt_fontinfo info;
	int i, j, n, return_value = 1;
	//stbrp_context *context = (stbrp_context *) spc->pack_info;
	stbrp_rect* rects;

	// flag all characters as NOT packed
	for( i = 0; i < num_ranges; ++i )
		for( j = 0; j < ranges[ i ].num_chars; ++j )
			ranges[ i ].chardata_for_range[ j ].x0 =
					ranges[ i ].chardata_for_range[ j ].y0 =
					ranges[ i ].chardata_for_range[ j ].x1 =
					ranges[ i ].chardata_for_range[ j ].y1 = 0;

	n = 0;
	for( i = 0; i < num_ranges; ++i )
		n += ranges[ i ].num_chars;

	rects = ( stbrp_rect * ) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context);
	if( rects == NULL )
		return 0;

	info.userdata = spc->user_allocator_context;
	stbtt_InitFont( &info, fontdata, stbtt_GetFontOffsetForIndex( fontdata, font_index ) );

	n = stbtt_PackFontRangesGatherRects( spc, &info, ranges, num_ranges, rects );

	stbtt_PackFontRangesPackRects( spc, rects, n );

	return_value = stbtt_PackFontRangesRenderIntoRects( spc, &info, ranges, num_ranges, rects );

	STBTT_free(rects, spc->user_allocator_context);
	return return_value;
}

STBTT_DEF int stbtt_PackFontRange( stbtt_pack_context* spc, unsigned char* fontdata, int font_index, float font_size,
                                   int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar* chardata_for_range )
{
	stbtt_pack_range range;
	range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range;
	range.array_of_unicode_codepoints = NULL;
	range.num_chars = num_chars_in_range;
	range.chardata_for_range = chardata_for_range;
	range.font_size = font_size;
	return stbtt_PackFontRanges( spc, fontdata, font_index, &range, 1 );
}

STBTT_DEF void stbtt_GetPackedQuad( stbtt_packedchar* chardata, int pw, int ph, int char_index, float* xpos, float* ypos, stbtt_aligned_quad* q, int align_to_integer )
{
	float ipw = 1.0f / pw, iph = 1.0f / ph;
	stbtt_packedchar* b = chardata + char_index;

	if( align_to_integer )
	{
		float x = ( float ) STBTT_ifloor((*xpos + b->xoff) + 0.5f);
		float y = ( float ) STBTT_ifloor((*ypos + b->yoff) + 0.5f);
		q->x0 = x;
		q->y0 = y;
		q->x1 = x + b->xoff2 - b->xoff;
		q->y1 = y + b->yoff2 - b->yoff;
	}
	else
	{
		q->x0 = *xpos + b->xoff;
		q->y0 = *ypos + b->yoff;
		q->x1 = *xpos + b->xoff2;
		q->y1 = *ypos + b->yoff2;
	}

	q->s0 = b->x0 * ipw;
	q->t0 = b->y0 * iph;
	q->s1 = b->x1 * ipw;
	q->t1 = b->y1 * iph;

	*xpos += b->xadvance;
}

//////////////////////////////////////////////////////////////////////////////
//
// font name matching -- recommended not to use this
//

// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string
static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix( const stbtt_uint8* s1, stbtt_int32 len1, const stbtt_uint8* s2, stbtt_int32 len2 )
{
	stbtt_int32 i = 0;

	// convert utf16 to utf8 and compare the results while converting
	while( len2 )
	{
		stbtt_uint16 ch = s2[ 0 ] * 256 + s2[ 1 ];
		if( ch < 0x80 )
		{
			if( i >= len1 )
				return -1;
			if( s1[ i++ ] != ch )
				return -1;
		}
		else if( ch < 0x800 )
		{
			if( i + 1 >= len1 )
				return -1;
			if( s1[ i++ ] != 0xc0 + ( ch >> 6 ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ch & 0x3f ) )
				return -1;
		}
		else if( ch >= 0xd800 && ch < 0xdc00 )
		{
			stbtt_uint32 c;
			stbtt_uint16 ch2 = s2[ 2 ] * 256 + s2[ 3 ];
			if( i + 3 >= len1 )
				return -1;
			c = ( ( ch - 0xd800 ) << 10 ) + ( ch2 - 0xdc00 ) + 0x10000;
			if( s1[ i++ ] != 0xf0 + ( c >> 18 ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ( c >> 12 ) & 0x3f ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ( c >> 6 ) & 0x3f ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ( c ) & 0x3f ) )
				return -1;
			s2 += 2; // plus another 2 below
			len2 -= 2;
		}
		else if( ch >= 0xdc00 && ch < 0xe000 )
		{
			return -1;
		}
		else
		{
			if( i + 2 >= len1 )
				return -1;
			if( s1[ i++ ] != 0xe0 + ( ch >> 12 ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ( ch >> 6 ) & 0x3f ) )
				return -1;
			if( s1[ i++ ] != 0x80 + ( ( ch ) & 0x3f ) )
				return -1;
		}
		s2 += 2;
		len2 -= 2;
	}
	return i;
}

STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian( const char* s1, int len1, const char* s2, int len2 )
{
	return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix( ( const stbtt_uint8* ) s1, len1, ( const stbtt_uint8* ) s2, len2 );
}

// returns results in whatever encoding you request... but note that 2-byte encodings
// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare
STBTT_DEF const char* stbtt_GetFontNameString( const stbtt_fontinfo* font, int* length, int platformID, int encodingID, int languageID, int nameID )
{
	stbtt_int32 i, count, stringOffset;
	stbtt_uint8* fc = font->data;
	stbtt_uint32 offset = font->fontstart;
	stbtt_uint32 nm = stbtt__find_table( fc, offset, "name" );
	if( !nm )
		return NULL;

	count = ttUSHORT( fc + nm + 2 );
	stringOffset = nm + ttUSHORT( fc + nm + 4 );
	for( i = 0; i < count; ++i )
	{
		stbtt_uint32 loc = nm + 6 + 12 * i;
		if( platformID == ttUSHORT( fc + loc + 0 ) && encodingID == ttUSHORT( fc + loc + 2 )
			&& languageID == ttUSHORT( fc + loc + 4 ) && nameID == ttUSHORT( fc + loc + 6 ) )
		{
			*length = ttUSHORT( fc + loc + 8 );
			return ( const char * ) ( fc + stringOffset + ttUSHORT( fc + loc + 10 ) );
		}
	}
	return NULL;
}

static int stbtt__matchpair( stbtt_uint8* fc, stbtt_uint32 nm, stbtt_uint8* name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id )
{
	stbtt_int32 i;
	stbtt_int32 count = ttUSHORT( fc + nm + 2 );
	stbtt_int32 stringOffset = nm + ttUSHORT( fc + nm + 4 );

	for( i = 0; i < count; ++i )
	{
		stbtt_uint32 loc = nm + 6 + 12 * i;
		stbtt_int32 id = ttUSHORT( fc + loc + 6 );
		if( id == target_id )
		{
			// find the encoding
			stbtt_int32 platform = ttUSHORT( fc + loc + 0 ), encoding = ttUSHORT( fc + loc + 2 ), language = ttUSHORT( fc + loc + 4 );

			// is this a Unicode encoding?
			if( platform == 0 || ( platform == 3 && encoding == 1 ) || ( platform == 3 && encoding == 10 ) )
			{
				stbtt_int32 slen = ttUSHORT( fc + loc + 8 );
				stbtt_int32 off = ttUSHORT( fc + loc + 10 );

				// check if there's a prefix match
				stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix( name, nlen, fc + stringOffset + off, slen );
				if( matchlen >= 0 )
				{
					// check for target_id+1 immediately following, with same encoding & language
					if( i + 1 < count && ttUSHORT( fc + loc + 12 + 6 ) == next_id && ttUSHORT( fc + loc + 12 ) == platform && ttUSHORT( fc + loc + 12 + 2 ) == encoding && ttUSHORT( fc + loc + 12 + 4 ) == language )
					{
						slen = ttUSHORT( fc + loc + 12 + 8 );
						off = ttUSHORT( fc + loc + 12 + 10 );
						if( slen == 0 )
						{
							if( matchlen == nlen )
								return 1;
						}
						else if( matchlen < nlen && name[ matchlen ] == ' ' )
						{
							++matchlen;
							if( stbtt_CompareUTF8toUTF16_bigendian( ( char* ) ( name + matchlen ), nlen - matchlen, ( char* )( fc + stringOffset + off ), slen ) )
								return 1;
						}
					}
					else
					{
						// if nothing immediately following
						if( matchlen == nlen )
							return 1;
					}
				}
			}

			// @TODO handle other encodings
		}
	}
	return 0;
}

static int stbtt__matches( stbtt_uint8* fc, stbtt_uint32 offset, stbtt_uint8* name, stbtt_int32 flags )
{
	stbtt_int32 nlen = ( stbtt_int32 ) STBTT_strlen((char *) name);
	stbtt_uint32 nm, hd;
	if( !stbtt__isfont( fc + offset ) )
		return 0;

	// check italics/bold/underline flags in macStyle...
	if( flags )
	{
		hd = stbtt__find_table( fc, offset, "head" );
		if( ( ttUSHORT( fc + hd + 44 ) & 7 ) != ( flags & 7 ) )
			return 0;
	}

	nm = stbtt__find_table( fc, offset, "name" );
	if( !nm )
		return 0;

	if( flags )
	{
		// if we checked the macStyle flags, then just check the family and ignore the subfamily
		if( stbtt__matchpair( fc, nm, name, nlen, 16, -1 ) )
			return 1;
		if( stbtt__matchpair( fc, nm, name, nlen, 1, -1 ) )
			return 1;
		if( stbtt__matchpair( fc, nm, name, nlen, 3, -1 ) )
			return 1;
	}
	else
	{
		if( stbtt__matchpair( fc, nm, name, nlen, 16, 17 ) )
			return 1;
		if( stbtt__matchpair( fc, nm, name, nlen, 1, 2 ) )
			return 1;
		if( stbtt__matchpair( fc, nm, name, nlen, 3, -1 ) )
			return 1;
	}

	return 0;
}

STBTT_DEF int stbtt_FindMatchingFont( const unsigned char* font_collection, const char* name_utf8, stbtt_int32 flags )
{
	stbtt_int32 i;
	for( i = 0;; ++i )
	{
		stbtt_int32 off = stbtt_GetFontOffsetForIndex( font_collection, i );
		if( off < 0 )
			return off;
		if( stbtt__matches( ( stbtt_uint8 * ) font_collection, off, ( stbtt_uint8* ) name_utf8, flags ) )
			return off;
	}
}

#endif // STB_TRUETYPE_IMPLEMENTATION

// FULL VERSION HISTORY
//
//   1.10 (2016-04-02) allow user-defined fabs() replacement
//                     fix memory leak if fontsize=0.0
//                     fix warning from duplicate typedef
//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges
//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
//                     allow PackFontRanges to pack and render in separate phases;
//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
//                     fixed an assert() bug in the new rasterizer
//                     replace assert() with STBTT_assert() in new rasterizer
//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
//                     also more precise AA rasterizer, except if shapes overlap
//                     remove need for STBTT_sort
//   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
//   1.04 (2015-04-15) typo in example
//   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
//   1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++
//   1.01 (2014-12-08) fix subpixel position when oversampling to exactly match
//                        non-oversampled; STBTT_POINT_SIZE for packed case only
//   1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling
//   0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg)
//   0.9  (2014-08-07) support certain mac/iOS fonts without an MS platformID
//   0.8b (2014-07-07) fix a warning
//   0.8  (2014-05-25) fix a few more warnings
//   0.7  (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back
//   0.6c (2012-07-24) improve documentation
//   0.6b (2012-07-20) fix a few more warnings
//   0.6  (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels,
//                        stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty
//   0.5  (2011-12-09) bugfixes:
//                        subpixel glyph renderer computed wrong bounding box
//                        first vertex of shape can be off-curve (FreeSans)
//   0.4b (2011-12-03) fixed an error in the font baking example
//   0.4  (2011-12-01) kerning, subpixel rendering (tor)
//                    bugfixes for:
//                        codepoint-to-glyph conversion using table fmt=12
//                        codepoint-to-glyph conversion using table fmt=4
//                        stbtt_GetBakedQuad with non-square texture (Zer)
//                    updated Hello World! sample to use kerning and subpixel
//                    fixed some warnings
//   0.3  (2009-06-24) cmap fmt=12, compound shapes (MM)
//                    userdata, malloc-from-userdata, non-zero fill (stb)
//   0.2  (2009-03-11) Fix unsigned/signed char warnings
//   0.1  (2009-03-09) First public release
//
